[packages/percona-server/v5.0.x: 138/202] - 5.0.87-b20

glen glen at pld-linux.org
Wed Oct 21 16:20:38 CEST 2015


commit 4b771f0e4d98c4a7193f4e75e2970910bb2558a4
Author: Elan Ruusamäe <glen at pld-linux.org>
Date:   Tue Dec 22 07:33:35 2009 +0000

    - 5.0.87-b20
    
    Changed files:
        mysql-innodb_rw_lock.patch -> 1.1.2.5

 mysql-innodb_rw_lock.patch | 2937 ++++++++++++++++++++++++++++++--------------
 1 file changed, 2030 insertions(+), 907 deletions(-)
---
diff --git a/mysql-innodb_rw_lock.patch b/mysql-innodb_rw_lock.patch
index b4a1a79..a509f70 100644
--- a/mysql-innodb_rw_lock.patch
+++ b/mysql-innodb_rw_lock.patch
@@ -1,215 +1,550 @@
+diff -ruN a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
+--- a/innobase/btr/btr0cur.c	2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/btr/btr0cur.c	2009-10-22 15:18:44.000000000 +0900
+@@ -313,7 +313,7 @@
+ #ifdef UNIV_SEARCH_PERF_STAT
+ 	info->n_searches++;
+ #endif	
+-	if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
++	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
+ 		&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
+ 		&& !estimate
+ #ifdef PAGE_CUR_LE_OR_EXTENDS
 diff -ruN a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
---- a/innobase/btr/btr0sea.c	2009-05-20 14:21:44.000000000 +0900
-+++ b/innobase/btr/btr0sea.c	2009-05-20 14:39:34.000000000 +0900
-@@ -773,7 +773,7 @@
+--- a/innobase/btr/btr0sea.c	2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/btr/btr0sea.c	2009-10-22 15:18:44.000000000 +0900
+@@ -773,8 +773,8 @@
  		rw_lock_s_lock(&btr_search_latch);
  	}
  
 -	ut_ad(btr_search_latch.writer != RW_LOCK_EX);
-+	ut_ad(btr_search_latch.writer_count == 0);
- 	ut_ad(btr_search_latch.reader_count > 0);
+-	ut_ad(btr_search_latch.reader_count > 0);
++	ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
++	ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
  
  	rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
-diff -ruN a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
---- a/innobase/include/sync0rw.h	2009-01-30 06:42:20.000000000 +0900
-+++ b/innobase/include/sync0rw.h	2009-04-16 16:15:28.000000000 +0900
-@@ -325,7 +325,17 @@
- Accessor functions for rw lock. */
- UNIV_INLINE
- ulint
--rw_lock_get_waiters(
-+rw_lock_get_s_waiters(
-+/*==================*/
-+	rw_lock_t*	lock);
-+UNIV_INLINE
-+ulint
-+rw_lock_get_x_waiters(
-+/*==================*/
-+	rw_lock_t*	lock);
-+UNIV_INLINE
-+ulint
-+rw_lock_get_wx_waiters(
- /*================*/
- 	rw_lock_t*	lock);
- UNIV_INLINE
-@@ -408,6 +418,17 @@
- 	rw_lock_debug_t*	info);	/* in: debug struct */
- #endif /* UNIV_SYNC_DEBUG */
  
-+#ifdef HAVE_ATOMIC_BUILTINS
-+/* This value means NOT_LOCKED */
-+#define RW_LOCK_BIAS		0x00100000
-+#else
-+#error HAVE_ATOMIC_BUILTINS is not defined. Do you use enough new GCC or compatibles?
-+#error Or do you use exact options for CFLAGS?
-+#error e.g. (for x86_32): "-m32 -march=i586 -mtune=i686"
-+#error e.g. (for Sparc_64): "-m64 -mcpu=v9"
-+#error Otherwise, this build may be slower than normal version.
-+#endif
+diff -ruN a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
+--- a/innobase/buf/buf0buf.c	2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/buf/buf0buf.c	2009-10-22 15:18:44.000000000 +0900
+@@ -1292,7 +1292,7 @@
+ 
+ 	if (mode == BUF_GET_NOWAIT) {
+ 		if (rw_latch == RW_S_LATCH) {
+-			success = rw_lock_s_lock_func_nowait(&(block->lock),
++			success = rw_lock_s_lock_nowait(&(block->lock),
+ 								file, line);
+ 			fix_type = MTR_MEMO_PAGE_S_FIX;
+ 		} else {
+@@ -1442,7 +1442,7 @@
+ 	ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
+ 
+ 	if (rw_latch == RW_S_LATCH) {
+-		success = rw_lock_s_lock_func_nowait(&(block->lock),
++		success = rw_lock_s_lock_nowait(&(block->lock),
+ 								file, line);
+ 		fix_type = MTR_MEMO_PAGE_S_FIX;
+ 	} else {
+@@ -1596,7 +1596,7 @@
+ 	ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+ 
+ 	if (rw_latch == RW_S_LATCH) {
+-		success = rw_lock_s_lock_func_nowait(&(block->lock),
++		success = rw_lock_s_lock_nowait(&(block->lock),
+ 								file, line);
+ 		fix_type = MTR_MEMO_PAGE_S_FIX;
+ 	} else {
+diff -ruN a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
+--- a/innobase/include/buf0buf.ic	2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/include/buf0buf.ic	2009-10-22 16:12:25.000000000 +0900
+@@ -523,7 +523,7 @@
+ #ifdef UNIV_SYNC_DEBUG	
+ 	ibool	ret;
+ 
+-	ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
++	ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
+ 
+ 	ut_ad(ret == TRUE);
+ 	ut_ad(mutex_own(&block->mutex));
+diff -ruN a/innobase/include/os0sync.h b/innobase/include/os0sync.h
+--- a/innobase/include/os0sync.h	2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/os0sync.h	2009-10-22 15:18:44.000000000 +0900
+@@ -1,11 +1,35 @@
++/*****************************************************************************
 +
- /* NOTE! The structure appears here only for the compiler to know its size.
- Do not use its fields directly! The structure used in the spin lock
- implementation of a read-write lock. Several threads may have a shared lock
-@@ -417,9 +438,9 @@
- field. Then no new readers are allowed in. */
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The interface to the operating system
+ synchronization primitives.
  
- struct rw_lock_struct {
--	os_event_t	event;	/* Used by sync0arr.c for thread queueing */
+-(c) 1995 Innobase Oy
 -
--#ifdef __WIN__
-+			/* Used by sync0arr.c for thread queueing */
-+	os_event_t	s_event;	/* Used for s_lock */
-+	os_event_t	x_event;	/* Used for x_lock */
- 	os_event_t	wait_ex_event;	/* This windows specific event is
- 				used by the thread which has set the
- 				lock state to RW_LOCK_WAIT_EX. The
-@@ -427,31 +448,35 @@
- 				thread will be the next one to proceed
- 				once the current the event gets
- 				signalled. See LEMMA 2 in sync0sync.c */
+ Created 9/6/1995 Heikki Tuuri
+ *******************************************************/
 +
+ #ifndef os0sync_h
+ #define os0sync_h
+ 
+@@ -261,6 +285,23 @@
+ /*===============*/
+ 	os_fast_mutex_t*	fast_mutex);	/* in: mutex to free */
+ 	
 +#ifdef HAVE_ATOMIC_BUILTINS
-+	volatile lint	lock_word;	/* Used by using atomic builtin */
++/**************************************************************
++Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins.
++Returns true if swapped, ptr is pointer to target, old_val is value to
++compare to, new_val is the value to swap in. */
++#define os_compare_and_swap(ptr, old_val, new_val) \
++	__sync_bool_compare_and_swap(ptr, old_val, new_val)
++
++/**************************************************************
++Atomic increment for InnoDB. Currently requires GCC atomic builtins.
++Returns the resulting value, ptr is pointer to target, amount is the
++amount of increment. */
++#define os_atomic_increment(ptr, amount) \
++	__sync_add_and_fetch(ptr, amount)
++
++#endif /* HAVE_ATOMIC_BUILTINS */
++
+ #ifndef UNIV_NONINL
+ #include "os0sync.ic"
  #endif
+diff -ruN a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
+--- a/innobase/include/sync0rw.h	2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/sync0rw.h	2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The read-write lock (for threads, not for database transactions)
+ 
+-(c) 1995 Innobase Oy
+-
+ Created 9/11/1995 Heikki Tuuri
+ *******************************************************/
+ 
+@@ -24,6 +47,12 @@
+ #define	RW_X_LATCH	2
+ #define	RW_NO_LATCH	3
+ 
++/* We decrement lock_word by this amount for each x_lock. It is also the
++start value for the lock_word, meaning that it limits the maximum number
++of concurrent read locks before the rw_lock breaks. The current value of
++0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
++#define X_LOCK_DECR		0x00100000
++
+ typedef struct rw_lock_struct		rw_lock_t;
+ #ifdef UNIV_SYNC_DEBUG
+ typedef struct rw_lock_debug_struct	rw_lock_debug_t;
+@@ -47,14 +76,14 @@
+ 					there may be waiters for the event */
+ #endif /* UNIV_SYNC_DEBUG */
+ 
+-extern	ulint	rw_s_system_call_count;
+-extern	ulint	rw_s_spin_wait_count;
+-extern	ulint	rw_s_exit_count;
+-extern	ulint	rw_s_os_wait_count;
+-extern	ulint	rw_x_system_call_count;
+-extern	ulint	rw_x_spin_wait_count;
+-extern	ulint	rw_x_os_wait_count;
+-extern	ulint	rw_x_exit_count;
++extern	ib_longlong	rw_s_spin_wait_count;
++extern	ib_longlong	rw_s_spin_round_count;
++extern	ib_longlong	rw_s_exit_count;
++extern	ib_longlong	rw_s_os_wait_count;
++extern	ib_longlong	rw_x_spin_wait_count;
++extern	ib_longlong	rw_x_spin_round_count;
++extern	ib_longlong	rw_x_os_wait_count;
++extern	ib_longlong	rw_x_exit_count;
+ 
+ /**********************************************************************
+ Creates, or rather, initializes an rw-lock object in a specified memory
+@@ -116,8 +145,22 @@
+ NOTE! The following macros should be used in rw s-locking, not the
+ corresponding function. */
+ 
+-#define rw_lock_s_lock_nowait(M)    rw_lock_s_lock_func_nowait(\
+-					     (M), __FILE__, __LINE__)
++#define rw_lock_s_lock_nowait(M, F, L)    rw_lock_s_lock_low(\
++					  (M), 0, (F), (L))
++/**********************************************************************
++Low-level function which tries to lock an rw-lock in s-mode. Performs no
++spinning. */
++UNIV_INLINE
++ibool
++rw_lock_s_lock_low(
++/*===============*/
++				/* out: TRUE if success */
++	rw_lock_t*	lock,	/* in: pointer to rw-lock */
++	ulint		pass __attribute__((unused)),
++				/* in: pass value; != 0, if the lock will be
++				passed to another thread to unlock */
++	const char*	file_name, /* in: file name where lock requested */
++	ulint		line);	/* in: line where requested */
+ /**********************************************************************
+ NOTE! Use the corresponding macro, not directly this function, except if
+ you supply the file name and line number. Lock an rw-lock in shared mode
+@@ -135,18 +178,6 @@
+ 	const char*	file_name,/* in: file name where lock requested */
+ 	ulint		line);	/* in: line where requested */
+ /**********************************************************************
+-NOTE! Use the corresponding macro, not directly this function, except if
+-you supply the file name and line number. Lock an rw-lock in shared mode
+-for the current thread if the lock can be acquired immediately. */
+-UNIV_INLINE
+-ibool
+-rw_lock_s_lock_func_nowait(
+-/*=======================*/
+-				/* out: TRUE if success */
+-        rw_lock_t*   	lock,  	/* in: pointer to rw-lock */
+-	const char*	file_name,/* in: file name where lock requested */
+-	ulint		line);	/* in: line where requested */
+-/**********************************************************************
+ NOTE! Use the corresponding macro, not directly this function! Lock an
+ rw-lock in exclusive mode for the current thread if the lock can be
+ obtained immediately. */
+@@ -338,6 +369,41 @@
+ rw_lock_get_reader_count(
+ /*=====================*/
+ 	rw_lock_t*	lock);
++/**********************************************************************
++Decrements lock_word the specified amount if it is greater than 0.
++This is used by both s_lock and x_lock operations. */
++UNIV_INLINE
++ibool
++rw_lock_lock_word_decr(
++/*===================*/
++					/* out: TRUE if decr occurs */
++	rw_lock_t*	lock,		/* in: rw-lock */
++	ulint		amount);	/* in: amount to decrement */
++/**********************************************************************
++Increments lock_word the specified amount and returns new value. */
++UNIV_INLINE
++lint
++rw_lock_lock_word_incr(
++/*===================*/
++					/* out: TRUE if decr occurs */
++	rw_lock_t*	lock,
++	ulint		amount);	/* in: rw-lock */
++/**********************************************************************
++This function sets the lock->writer_thread and lock->recursive fields.
++For platforms where we are using atomic builtins instead of lock->mutex
++it sets the lock->writer_thread field using atomics to ensure memory
++ordering. Note that it is assumed that the caller of this function
++effectively owns the lock i.e.: nobody else is allowed to modify
++lock->writer_thread at this point in time.
++The protocol is that lock->writer_thread MUST be updated BEFORE the
++lock->recursive flag is set. */
++UNIV_INLINE
++void
++rw_lock_set_writer_id_and_recursion_flag(
++/*=====================================*/
++	rw_lock_t*	lock,		/* in/out: lock to work on */
++	ibool		recursive);	/* in: TRUE if recursion
++					allowed */
+ #ifdef UNIV_SYNC_DEBUG
+ /**********************************************************************
+ Checks if the thread has locked the rw-lock in the specified mode, with
+@@ -417,47 +483,33 @@
+ field. Then no new readers are allowed in. */
  
+ struct rw_lock_struct {
++	volatile lint	lock_word;
++				/* Holds the state of the lock. */
++	volatile ulint	waiters;/* 1: there are waiters */
++	volatile ibool	recursive;/* Default value FALSE which means the lock
++				is non-recursive. The value is typically set
++				to TRUE making normal rw_locks recursive. In
++				case of asynchronous IO, when a non-zero
++				value of 'pass' is passed then we keep the
++				lock non-recursive.
++				This flag also tells us about the state of
++				writer_thread field. If this flag is set
++				then writer_thread MUST contain the thread
++				id of the current x-holder or wait-x thread.
++				This flag must be reset in x_unlock
++				functions before incrementing the lock_word */
++	volatile os_thread_id_t	writer_thread;
++				/* Thread id of writer thread. Is only
++				guaranteed to have sane and non-stale
++				value iff recursive flag is set. */
+ 	os_event_t	event;	/* Used by sync0arr.c for thread queueing */
+-
+-#ifdef __WIN__
+-	os_event_t	wait_ex_event;	/* This windows specific event is
+-				used by the thread which has set the
+-				lock state to RW_LOCK_WAIT_EX. The
+-				rw_lock design guarantees that this
+-				thread will be the next one to proceed
+-				once the current the event gets
+-				signalled. See LEMMA 2 in sync0sync.c */
+-#endif
+-
 -	ulint	reader_count;	/* Number of readers who have locked this
-+	volatile ulint	reader_count;	/* Number of readers who have locked this
- 				lock in the shared mode */
+-				lock in the shared mode */
 -	ulint	writer; 	/* This field is set to RW_LOCK_EX if there
-+	volatile ulint	writer; 	/* This field is set to RW_LOCK_EX if there
- 				is a writer owning the lock (in exclusive
- 				mode), RW_LOCK_WAIT_EX if a writer is
- 				queueing for the lock, and
- 				RW_LOCK_NOT_LOCKED, otherwise. */
+-				is a writer owning the lock (in exclusive
+-				mode), RW_LOCK_WAIT_EX if a writer is
+-				queueing for the lock, and
+-				RW_LOCK_NOT_LOCKED, otherwise. */
 -	os_thread_id_t	writer_thread;
-+	volatile os_thread_id_t	writer_thread;
- 				/* Thread id of a possible writer thread */
+-				/* Thread id of a possible writer thread */
 -	ulint	writer_count;	/* Number of times the same thread has
-+	volatile ulint	writer_count;	/* Number of times the same thread has
- 				recursively locked the lock in the exclusive
- 				mode */
+-				recursively locked the lock in the exclusive
+-				mode */
++	os_event_t	wait_ex_event;
++				/* Event for next-writer to wait on. A thread
++				must decrement lock_word before waiting. */
 +#ifndef HAVE_ATOMIC_BUILTINS
  	mutex_t	mutex;		/* The mutex protecting rw_lock_struct */
-+#endif
- 	ulint	pass; 		/* Default value 0. This is set to some
- 				value != 0 given by the caller of an x-lock
- 				operation, if the x-lock is to be passed to
- 				another thread to unlock (which happens in
- 				asynchronous i/o). */
+-	ulint	pass; 		/* Default value 0. This is set to some
+-				value != 0 given by the caller of an x-lock
+-				operation, if the x-lock is to be passed to
+-				another thread to unlock (which happens in
+-				asynchronous i/o). */
 -	ulint	waiters;	/* This ulint is set to 1 if there are
 -				waiters (readers or writers) in the global
 -				wait array, waiting for this rw_lock.
 -				Otherwise, == 0. */
 -	ibool	writer_is_wait_ex;
-+	volatile ulint	s_waiters; /* 1: there are waiters (s_lock) */
-+	volatile ulint	x_waiters; /* 1: there are waiters (x_lock) */
-+	volatile ulint	wait_ex_waiters; /* 1: there are waiters (wait_ex) */
-+	volatile ibool	writer_is_wait_ex;
- 				/* This is TRUE if the writer field is
- 				RW_LOCK_WAIT_EX; this field is located far
- 				from the memory update hotspot fields which
+-				/* This is TRUE if the writer field is
+-				RW_LOCK_WAIT_EX; this field is located far
+-				from the memory update hotspot fields which
+-				are at the start of this struct, thus we can
+-				peek this field without causing much memory
+-				bus traffic */
++#endif /* HAVE_ATOMIC_BUILTINS */
++
+ 	UT_LIST_NODE_T(rw_lock_t) list;
+ 				/* All allocated rw locks are put into a
+ 				list */
+@@ -465,15 +517,23 @@
+ 	UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
+ 				/* In the debug version: pointer to the debug
+ 				info list of the lock */
++	ulint	level;		/* Level in the global latching order. */
+ #endif /* UNIV_SYNC_DEBUG */
+-	ulint	level;		/* Level in the global latching
+-				order; default SYNC_LEVEL_NONE */
++	ulint count_os_wait;	/* Count of os_waits. May not be accurate */
+ 	const char*	cfile_name;/* File name where lock created */
+-	ulint	cline;		/* Line where created */
++        /* last s-lock file/line is not guaranteed to be correct */
+ 	const char*	last_s_file_name;/* File name where last s-locked */
+ 	const char*	last_x_file_name;/* File name where last x-locked */
+-	ulint	last_s_line;	/* Line number where last time s-locked */
+-	ulint	last_x_line;	/* Line number where last time x-locked */
++	ibool		writer_is_wait_ex;
++				/* This is TRUE if the writer field is
++				RW_LOCK_WAIT_EX; this field is located far
++				from the memory update hotspot fields which
++				are at the start of this struct, thus we can
++				peek this field without causing much memory
++				bus traffic */
++	unsigned	cline:14;	/* Line where created */
++	unsigned	last_s_line:14;	/* Line number where last time s-locked */
++	unsigned	last_x_line:14;	/* Line number where last time x-locked */
+ 	ulint	magic_n;
+ };
+ 
 diff -ruN a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
---- a/innobase/include/sync0rw.ic	2009-01-30 06:42:20.000000000 +0900
-+++ b/innobase/include/sync0rw.ic	2009-04-16 17:06:53.000000000 +0900
-@@ -47,20 +47,64 @@
- Accessor functions for rw lock. */
- UNIV_INLINE
+--- a/innobase/include/sync0rw.ic	2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/sync0rw.ic	2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The read-write lock (for threads)
+ 
+-(c) 1995 Innobase Oy
+-
+ Created 9/11/1995 Heikki Tuuri
+ *******************************************************/
+ 
+@@ -49,53 +72,88 @@
  ulint
--rw_lock_get_waiters(
-+rw_lock_get_s_waiters(
+ rw_lock_get_waiters(
  /*================*/
- 	rw_lock_t*	lock)
+-	rw_lock_t*	lock)
++				/* out: 1 if waiters, 0 otherwise */
++	rw_lock_t*	lock)	/* in: rw-lock */
  {
--	return(lock->waiters);
-+	return(lock->s_waiters);
+ 	return(lock->waiters);
  }
++
++/************************************************************************
++Sets lock->waiters to 1. It is not an error if lock->waiters is already
++1. On platforms where ATOMIC builtins are used this function enforces a
++memory barrier. */
  UNIV_INLINE
--void
+ void
 -rw_lock_set_waiters(
-+ulint
-+rw_lock_get_x_waiters(
- /*================*/
-+	rw_lock_t*	lock)
-+{
-+	return(lock->x_waiters);
-+}
-+UNIV_INLINE
-+ulint
-+rw_lock_get_wx_waiters(
-+/*================*/
-+	rw_lock_t*      lock)
-+{
-+	return(lock->wait_ex_waiters);
-+}
-+UNIV_INLINE
-+void
-+rw_lock_set_s_waiters(
- 	rw_lock_t*	lock,
- 	ulint		flag)
+-/*================*/
+-	rw_lock_t*	lock,
+-	ulint		flag)
++rw_lock_set_waiter_flag(
++/*====================*/
++	rw_lock_t*	lock)	/* in: rw-lock */
  {
 -	lock->waiters = flag;
 +#ifdef HAVE_ATOMIC_BUILTINS
-+	__sync_lock_test_and_set(&lock->s_waiters, flag);
-+#else
-+	lock->s_waiters = flag;
-+#endif
-+}
-+UNIV_INLINE
-+void
-+rw_lock_set_x_waiters(
-+	rw_lock_t*	lock,
-+	ulint		flag)
-+{
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	__sync_lock_test_and_set(&lock->x_waiters, flag);
-+#else
-+	lock->x_waiters = flag;
-+#endif
-+}
-+UNIV_INLINE
++	os_compare_and_swap(&lock->waiters, 0, 1);
++#else /* HAVE_ATOMIC_BUILTINS */
++	lock->waiters = 1;
++#endif /* HAVE_ATOMIC_BUILTINS */
+ }
++
++/************************************************************************
++Resets lock->waiters to 0. It is not an error if lock->waiters is already
++0. On platforms where ATOMIC builtins are used this function enforces a
++memory barrier. */
+ UNIV_INLINE
+-ulint
+-rw_lock_get_writer(
+-/*===============*/
+-	rw_lock_t*	lock)
 +void
-+rw_lock_set_wx_waiters(
-+/*================*/
-+	rw_lock_t*      lock,
-+	ulint           flag)
-+{
++rw_lock_reset_waiter_flag(
++/*======================*/
++	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+-	return(lock->writer);
 +#ifdef HAVE_ATOMIC_BUILTINS
-+	__sync_lock_test_and_set(&lock->wait_ex_waiters, flag);
-+#else
-+	lock->wait_ex_waiters = flag;
-+#endif
++	os_compare_and_swap(&lock->waiters, 1, 0);
++#else /* HAVE_ATOMIC_BUILTINS */
++	lock->waiters = 0;
++#endif /* HAVE_ATOMIC_BUILTINS */
  }
++
++/**********************************************************************
++Returns the write-status of the lock - this function made more sense
++with the old rw_lock implementation. */
  UNIV_INLINE
- ulint
-@@ -68,7 +112,19 @@
+-void
+-rw_lock_set_writer(
++ulint
++rw_lock_get_writer(
  /*===============*/
- 	rw_lock_t*	lock)
+-	rw_lock_t*	lock,
+-	ulint		flag)
++	rw_lock_t*	lock)
  {
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if (lock->writer == RW_LOCK_NOT_LOCKED) {
+-	lock->writer = flag;
++	lint lock_word = lock->lock_word;
++	if(lock_word > 0) {
++		/* return NOT_LOCKED in s-lock state, like the writer
++		member of the old lock implementation. */
 +		return(RW_LOCK_NOT_LOCKED);
-+	}
-+
-+	if (lock->writer_is_wait_ex) {
-+		return(RW_LOCK_WAIT_EX);
-+	} else {
++	} else if (((-lock_word) % X_LOCK_DECR) == 0) {
 +		return(RW_LOCK_EX);
++	} else {
++                ut_ad(lock_word > -X_LOCK_DECR);
++		return(RW_LOCK_WAIT_EX);
 +	}
-+#else
- 	return(lock->writer);
-+#endif
  }
++
++/**********************************************************************
++Returns number of readers. */
  UNIV_INLINE
- void
-@@ -96,6 +152,7 @@
+ ulint
+ rw_lock_get_reader_count(
+ /*=====================*/
+ 	rw_lock_t*	lock)
  {
- 	lock->reader_count = count;
+-	return(lock->reader_count);
+-}
+-UNIV_INLINE
+-void
+-rw_lock_set_reader_count(
+-/*=====================*/
+-	rw_lock_t*	lock,
+-	ulint		count)
+-{
+-	lock->reader_count = count;
++	lint lock_word = lock->lock_word;
++	if(lock_word > 0) {
++		/* s-locked, no x-waiters */
++		return(X_LOCK_DECR - lock_word);
++	} else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
++		/* s-locked, with x-waiters */
++		return((ulint)(-lock_word));
++	}
++	return(0);
  }
++
 +#ifndef HAVE_ATOMIC_BUILTINS
  UNIV_INLINE
  mutex_t*
  rw_lock_get_mutex(
-@@ -104,6 +161,7 @@
+@@ -104,6 +162,7 @@
  {
  	return(&(lock->mutex));
  }
@@ -217,448 +552,774 @@ diff -ruN a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
  
  /**********************************************************************
  Returns the value of writer_count for the lock. Does not reserve the lock
-@@ -133,14 +191,26 @@
+@@ -115,7 +174,126 @@
+ 				/* out: value of writer_count */
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+-	return(lock->writer_count);
++	lint lock_copy = lock->lock_word;
++	/* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
++	if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
++		return(0);
++	}
++	return(((-lock_copy) / X_LOCK_DECR) + 1);
++}
++
++/**********************************************************************
++Two different implementations for decrementing the lock_word of a rw_lock:
++one for systems supporting atomic operations, one for others. This does
++does not support recusive x-locks: they should be handled by the caller and
++need not be atomic since they are performed by the current lock holder.
++Returns true if the decrement was made, false if not. */
++UNIV_INLINE
++ibool
++rw_lock_lock_word_decr(
++/*===================*/
++				/* out: TRUE if decr occurs */
++	rw_lock_t*	lock,	/* in: rw-lock */
++	ulint		amount)	/* in: amount of decrement */
++{
++
++#ifdef HAVE_ATOMIC_BUILTINS
++
++        lint local_lock_word = lock->lock_word;
++	while (local_lock_word > 0) {
++		if(os_compare_and_swap(&(lock->lock_word),
++                                       local_lock_word,
++                                       local_lock_word - amount)) {
++			return(TRUE);
++		}
++		local_lock_word = lock->lock_word;
++	}
++	return(FALSE);
++
++#else /* HAVE_ATOMIC_BUILTINS */
++
++	ibool success = FALSE;
++	mutex_enter(&(lock->mutex));
++	if(lock->lock_word > 0) {
++		lock->lock_word -= amount;
++		success = TRUE;
++	}
++	mutex_exit(&(lock->mutex));
++	return(success);
++
++#endif /* HAVE_ATOMIC_BUILTINS */
++}
++
++/**********************************************************************
++Two different implementations for incrementing the lock_word of a rw_lock:
++one for systems supporting atomic operations, one for others.
++Returns the value of lock_word after increment. */
++UNIV_INLINE
++lint
++rw_lock_lock_word_incr(
++/*===================*/
++				/* out: lock->lock_word after increment */
++	rw_lock_t*	lock,	/* in: rw-lock */
++	ulint		amount)	/* in: amount of increment */
++{
++
++#ifdef HAVE_ATOMIC_BUILTINS
++
++	return(os_atomic_increment(&(lock->lock_word), amount));
++
++#else /* HAVE_ATOMIC_BUILTINS */
++
++	lint local_lock_word;
++
++	mutex_enter(&(lock->mutex));
++
++	lock->lock_word += amount;
++	local_lock_word = lock->lock_word;
++
++	mutex_exit(&(lock->mutex));
++
++        return(local_lock_word);
++
++#endif /* HAVE_ATOMIC_BUILTINS */
++}
++
++/**********************************************************************
++This function sets the lock->writer_thread and lock->recursive fields.
++For platforms where we are using atomic builtins instead of lock->mutex
++it sets the lock->writer_thread field using atomics to ensure memory
++ordering. Note that it is assumed that the caller of this function
++effectively owns the lock i.e.: nobody else is allowed to modify
++lock->writer_thread at this point in time.
++The protocol is that lock->writer_thread MUST be updated BEFORE the
++lock->recursive flag is set. */
++UNIV_INLINE
++void
++rw_lock_set_writer_id_and_recursion_flag(
++/*=====================================*/
++	rw_lock_t*	lock,		/* in/out: lock to work on */
++	ibool		recursive)	/* in: TRUE if recursion
++					allowed */
++{
++	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
++
++#ifdef HAVE_ATOMIC_BUILTINS
++	os_thread_id_t	local_thread;
++	ibool		success;
++
++	local_thread = lock->writer_thread;
++	success = os_compare_and_swap(&lock->writer_thread,
++				      local_thread, curr_thread);
++	ut_a(success);
++	lock->recursive = recursive;
++
++#else /* HAVE_ATOMIC_BUILTINS */
++
++	mutex_enter(&lock->mutex);
++	lock->writer_thread = curr_thread;
++	lock->recursive = recursive;
++	mutex_exit(&lock->mutex);
++
++#endif /* HAVE_ATOMIC_BUILTINS */
+ }
+ 
+ /**********************************************************************
+@@ -133,26 +311,21 @@
  	const char*	file_name, /* in: file name where lock requested */
  	ulint		line)	/* in: line where requested */
  {
 -#ifdef UNIV_SYNC_DEBUG
-+#if defined(UNIV_SYNC_DEBUG) && !defined(HAVE_ATOMIC_BUILTINS)
- 	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
- #endif /* UNIV_SYNC_DEBUG */
- 	/* Check if the writer field is free */
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) {
-+		/* try s-lock */
-+		if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
-+			/* fail */
-+			__sync_fetch_and_add(&(lock->lock_word),1);
-+			return(FALSE);	/* locking did not succeed */
-+		}
-+		/* success */
-+		__sync_fetch_and_add(&(lock->reader_count),1);
-+#else
- 	if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
- 		/* Set the shared lock by incrementing the reader count */
- 		lock->reader_count++;
-+#endif
+-	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+-#endif /* UNIV_SYNC_DEBUG */
+-	/* Check if the writer field is free */
+-
+-	if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
+-		/* Set the shared lock by incrementing the reader count */
+-		lock->reader_count++;
++	/* TODO: study performance of UNIV_LIKELY branch prediction hints. */
++	if (!rw_lock_lock_word_decr(lock, 1)) {
++		/* Locking did not succeed */
++		return(FALSE);
++	}
  
  #ifdef UNIV_SYNC_DEBUG
- 		rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
-@@ -167,11 +237,15 @@
+-		rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
+-									line);
++	rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
+ #endif
+-		lock->last_s_file_name = file_name;
+-		lock->last_s_line = line;
+-
+-		return(TRUE);	/* locking succeeded */
+-	}
++	/* These debugging values are not set safely: they may be incorrect
++        or even refer to a line that is invalid for the file name. */
++	lock->last_s_file_name = file_name;
++	lock->last_s_line = line;
+ 
+-	return(FALSE);	/* locking did not succeed */
++	return(TRUE);	/* locking succeeded */
+ }
+ 
+ /**********************************************************************
+@@ -167,11 +340,10 @@
  	const char*	file_name,	/* in: file name where requested */
  	ulint		line)		/* in: line where lock requested */
  {
 -	ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
-+	ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
- 	ut_ad(rw_lock_get_reader_count(lock) == 0);
+-	ut_ad(rw_lock_get_reader_count(lock) == 0);
++	ut_ad(lock->lock_word == X_LOCK_DECR);
  	
- 	/* Set the shared lock by incrementing the reader count */
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	__sync_fetch_and_add(&(lock->reader_count),1);
-+#else
- 	lock->reader_count++;
-+#endif
+-	/* Set the shared lock by incrementing the reader count */
+-	lock->reader_count++;
++	/* Indicate there is a new reader by decrementing lock_word */
++	lock->lock_word--;
  
  	lock->last_s_file_name = file_name;
  	lock->last_s_line = line;
-@@ -199,7 +273,11 @@
+@@ -194,13 +366,11 @@
+ 	ulint		line)		/* in: line where lock requested */
+ {
+         ut_ad(rw_lock_validate(lock));
+-	ut_ad(rw_lock_get_reader_count(lock) == 0);
+-	ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
++	ut_ad(lock->lock_word == X_LOCK_DECR);
  
- 	rw_lock_set_writer(lock, RW_LOCK_EX);
+-	rw_lock_set_writer(lock, RW_LOCK_EX);
++	lock->lock_word -= X_LOCK_DECR;
  	lock->writer_thread = os_thread_get_curr_id();
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	__sync_fetch_and_add(&(lock->writer_count),1);
-+#else
- 	lock->writer_count++;
-+#endif
- 	lock->pass = 0;
+-	lock->writer_count++;
+-	lock->pass = 0;
++	lock->recursive = TRUE;
  			
  	lock->last_x_file_name = file_name;
-@@ -241,15 +319,21 @@
+ 	lock->last_x_line = line;
+@@ -241,15 +411,12 @@
  	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
  #endif /* UNIV_SYNC_DEBUG */
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_enter(rw_lock_get_mutex(lock));
-+#endif
- 
- 	if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
-+#ifndef HAVE_ATOMIC_BUILTINS
- 		mutex_exit(rw_lock_get_mutex(lock));
-+#endif
+-	mutex_enter(rw_lock_get_mutex(lock));
+-
+-	if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
+-		mutex_exit(rw_lock_get_mutex(lock));
++	/* TODO: study performance of UNIV_LIKELY branch prediction hints. */
++	if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
  
  		return; /* Success */
  	} else {
  		/* Did not succeed, try spin wait */
-+#ifndef HAVE_ATOMIC_BUILTINS
- 		mutex_exit(rw_lock_get_mutex(lock));
-+#endif
+-		mutex_exit(rw_lock_get_mutex(lock));
  
  		rw_lock_s_lock_spin(lock, pass, file_name, line);
  
-@@ -272,11 +356,23 @@
+@@ -259,86 +426,60 @@
+ 
+ /**********************************************************************
+ NOTE! Use the corresponding macro, not directly this function! Lock an
+-rw-lock in shared mode for the current thread if the lock can be acquired
+-immediately. */
++rw-lock in exclusive mode for the current thread if the lock can be
++obtained immediately. */
+ UNIV_INLINE
+ ibool
+-rw_lock_s_lock_func_nowait(
++rw_lock_x_lock_func_nowait(
+ /*=======================*/
+ 				/* out: TRUE if success */
+         rw_lock_t*   	lock,  	/* in: pointer to rw-lock */
+ 	const char*	file_name,/* in: file name where lock requested */
+ 	ulint		line)	/* in: line where requested */
  {
- 	ibool	success	= FALSE;
+-	ibool	success	= FALSE;
+-
+-	mutex_enter(rw_lock_get_mutex(lock));
+-
+-	if (lock->writer == RW_LOCK_NOT_LOCKED) {
+-		/* Set the shared lock by incrementing the reader count */
+-		lock->reader_count++;
++	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
  
+-#ifdef UNIV_SYNC_DEBUG
+-		rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
+-									line);
+-#endif
++	ibool success;
+ 
+-		lock->last_s_file_name = file_name;
+-		lock->last_s_line = line;
 +#ifdef HAVE_ATOMIC_BUILTINS
-+	if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
-+		/* try s-lock */
-+		if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) {
-+			/* fail */
-+			__sync_fetch_and_add(&(lock->lock_word),1);
-+			return(FALSE);	/* locking did not succeed */
-+		}
-+		/* success */
-+		__sync_fetch_and_add(&(lock->reader_count),1);
++	success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
 +#else
- 	mutex_enter(rw_lock_get_mutex(lock));
- 
- 	if (lock->writer == RW_LOCK_NOT_LOCKED) {
- 		/* Set the shared lock by incrementing the reader count */
- 		lock->reader_count++;
-+#endif
- 
- #ifdef UNIV_SYNC_DEBUG
- 		rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
-@@ -289,7 +385,9 @@
+ 		
++	success = FALSE;
++	mutex_enter(&(lock->mutex));
++	if (lock->lock_word == X_LOCK_DECR) {
++		lock->lock_word = 0;
  		success = TRUE;
  	}
++	mutex_exit(&(lock->mutex));
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_exit(rw_lock_get_mutex(lock));
-+#endif
- 
- 	return(success);
- }
-@@ -309,6 +407,54 @@
- {
- 	ibool		success		= FALSE;
- 	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if (lock->reader_count == 0) {
-+		/* try to lock writer */
-+		if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
-+				== RW_LOCK_NOT_LOCKED) {
-+			/* success */
-+retry_x_lock:
-+			/* try x-lock */
-+			if(__sync_sub_and_fetch(&(lock->lock_word),
-+					RW_LOCK_BIAS) == 0) {
-+				/* success */
-+				lock->writer_thread = curr_thread;
-+				lock->pass = 0;
-+				lock->writer_is_wait_ex = FALSE;
-+				/* next function may work as memory barrier */
-+			relock:
-+				__sync_fetch_and_add(&(lock->writer_count),1);
-+
-+#ifdef UNIV_SYNC_DEBUG
-+				rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+-	mutex_exit(rw_lock_get_mutex(lock));
+-
+-	return(success);
+-}
 +#endif
-+
-+				lock->last_x_file_name = file_name;
-+				lock->last_x_line = line;
-+
-+				ut_ad(rw_lock_validate(lock));
-+
-+				return(TRUE);
-+			} else {
-+				/* fail (x-lock) */
-+				if (__sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS)
-+						== 0)
-+					goto retry_x_lock;
-+			}
-+
-+			__sync_lock_test_and_set(&(lock->writer),RW_LOCK_NOT_LOCKED);
-+		}
-+	}
-+
-+	if (lock->pass == 0
-+			&& os_thread_eq(lock->writer_thread, curr_thread)) {
-+		goto relock;
++	if (success) {
++		rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+ 
+-/**********************************************************************
+-NOTE! Use the corresponding macro, not directly this function! Lock an
+-rw-lock in exclusive mode for the current thread if the lock can be
+-obtained immediately. */
+-UNIV_INLINE
+-ibool
+-rw_lock_x_lock_func_nowait(
+-/*=======================*/
+-				/* out: TRUE if success */
+-        rw_lock_t*   	lock,  	/* in: pointer to rw-lock */
+-	const char*	file_name,/* in: file name where lock requested */
+-	ulint		line)	/* in: line where requested */
+-{
+-	ibool		success		= FALSE;
+-	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
+-	mutex_enter(rw_lock_get_mutex(lock));
++	} else if (lock->recursive
++		   && os_thread_eq(lock->writer_thread, curr_thread)) {
++		/* Relock: this lock_word modification is safe since no other
++		threads can modify (lock, unlock, or reserve) lock_word while
++		there is an exclusive writer and this is the writer thread. */
++		lock->lock_word -= X_LOCK_DECR;
+ 
+-	if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
+-	} else if (UNIV_LIKELY(rw_lock_get_writer(lock)
+-			       == RW_LOCK_NOT_LOCKED)) {
+-		rw_lock_set_writer(lock, RW_LOCK_EX);
+-		lock->writer_thread = curr_thread;
+-		lock->pass = 0;
+-	relock:
+-		lock->writer_count++;
++		ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
+ 			
++	} else {
++		/* Failure */
++		return(FALSE);
 +	}
-+
-+	//ut_ad(rw_lock_validate(lock));
-+
-+	return(FALSE);
-+#else
- 	mutex_enter(rw_lock_get_mutex(lock));
+ #ifdef UNIV_SYNC_DEBUG
+-		rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
++	rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+ #endif
+ 
+-		lock->last_x_file_name = file_name;
+-		lock->last_x_line = line;
+-
+-		success = TRUE;
+-	} else if (rw_lock_get_writer(lock) == RW_LOCK_EX
+-			&& lock->pass == 0
+-			&& os_thread_eq(lock->writer_thread, curr_thread)) {
+-		goto relock;
+-	}
+-
+-	mutex_exit(rw_lock_get_mutex(lock));
++	lock->last_x_file_name = file_name;
++	lock->last_x_line = line;
  
- 	if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
-@@ -339,6 +485,7 @@
          ut_ad(rw_lock_validate(lock));
  
- 	return(success);
-+#endif
+-	return(success);
++	return(TRUE);
  }
  
  /**********************************************************************
-@@ -354,16 +501,33 @@
+@@ -354,39 +495,21 @@
  #endif
  	)
  {
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_t*	mutex	= &(lock->mutex);
+-	mutex_t*	mutex	= &(lock->mutex);
 -	ibool		sg 	= FALSE;
-+#endif
-+	ibool		x_sg 	= FALSE;
-+	ibool		wx_sg	= FALSE;
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	ibool		last	= FALSE;
-+#endif
- 
-+#ifndef HAVE_ATOMIC_BUILTINS
-         /* Acquire the mutex protecting the rw-lock fields */
- 	mutex_enter(mutex);
-+#endif
- 
- 	/* Reset the shared lock by decrementing the reader count */
- 
- 	ut_a(lock->reader_count > 0);
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	/* unlock lock_word */
-+	__sync_fetch_and_add(&(lock->lock_word),1);
-+
-+	if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) {
-+		last = TRUE;
-+	}
-+#else
- 	lock->reader_count--;
-+#endif
+-
+-        /* Acquire the mutex protecting the rw-lock fields */
+-	mutex_enter(mutex);
+-
+-	/* Reset the shared lock by decrementing the reader count */
+-
+-	ut_a(lock->reader_count > 0);
+-	lock->reader_count--;
++	ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
  
  #ifdef UNIV_SYNC_DEBUG
  	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
-@@ -372,22 +536,39 @@
- 	/* If there may be waiters and this was the last s-lock,
- 	signal the object */
+ #endif
+ 	
+-	/* If there may be waiters and this was the last s-lock,
+-	signal the object */
++	/* Increment lock_word to indicate 1 less reader */
++	if (rw_lock_lock_word_incr(lock, 1) == 0) {
  
 -	if (UNIV_UNLIKELY(lock->waiters)
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0))) {
-+		os_event_set(lock->wait_ex_event);
-+		sync_array_object_signalled(sync_primary_wait_array);
-+	}
-+	else if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->x_waiters, 0))) {
-+		os_event_set(lock->x_event);
-+		sync_array_object_signalled(sync_primary_wait_array);
-+	}
-+#else
-+	if (UNIV_UNLIKELY(lock->wait_ex_waiters)
- 			&& lock->reader_count == 0) {
+-			&& lock->reader_count == 0) {
 -	       	sg = TRUE;
-+	       	wx_sg = TRUE;
- 
+-
 -		rw_lock_set_waiters(lock, 0);
-+		rw_lock_set_wx_waiters(lock, 0);
-+	}
-+	else if (UNIV_UNLIKELY(lock->x_waiters)
-+			&& lock->reader_count == 0) {
-+		x_sg = TRUE;
-+
-+		rw_lock_set_x_waiters(lock, 0);
- 	}
- 	
- 	mutex_exit(mutex);
- 
+-	}
+-	
+-	mutex_exit(mutex);
+-
 -	if (UNIV_UNLIKELY(sg)) {
 -#ifdef __WIN__
-+	if (UNIV_UNLIKELY(wx_sg)) {
++		/* wait_ex waiter exists. It may not be asleep, but we signal
++                anyway. We do not wake other waiters, because they can't
++                exist without wait_ex waiter and wait_ex waiter goes first.*/
  		os_event_set(lock->wait_ex_event);
 -#endif
 -		os_event_set(lock->event);
-+		sync_array_object_signalled(sync_primary_wait_array);
-+	} else if (UNIV_UNLIKELY(x_sg)) {
-+		os_event_set(lock->x_event);
  		sync_array_object_signalled(sync_primary_wait_array);
++
  	}
-+#endif
  
          ut_ad(rw_lock_validate(lock));
- 
-@@ -409,13 +590,22 @@
- 
- 	ut_ad(lock->reader_count > 0);
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	__sync_sub_and_fetch(&(lock->reader_count),1);
-+#else
- 	lock->reader_count--;
-+#endif
+@@ -405,16 +528,15 @@
+ /*====================*/
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+-	/* Reset the shared lock by decrementing the reader count */
+-
+-	ut_ad(lock->reader_count > 0);
+-
+-	lock->reader_count--;
++	ut_ad(lock->lock_word < X_LOCK_DECR);
  
  #ifdef UNIV_SYNC_DEBUG
  	rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
  #endif
  
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	ut_ad(!lock->s_waiters);
-+	ut_ad(!lock->x_waiters);
-+#else
++	/* Decrease reader count by incrementing lock_word */
++	lock->lock_word++;
++
  	ut_ad(!lock->waiters);
-+#endif
          ut_ad(rw_lock_validate(lock));
  #ifdef UNIV_SYNC_PERF_STAT
- 	rw_s_exit_count++;
-@@ -435,41 +625,83 @@
+@@ -435,42 +557,32 @@
  #endif
  	)
  {
 -	ibool	sg 	= FALSE;
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	ibool	last	= FALSE;
-+#endif
-+	ibool	s_sg	= FALSE;
-+	ibool	x_sg	= FALSE;
- 
-+#ifndef HAVE_ATOMIC_BUILTINS
-         /* Acquire the mutex protecting the rw-lock fields */
- 	mutex_enter(&(lock->mutex));
-+#endif
- 
- 	/* Reset the exclusive lock if this thread no longer has an x-mode
- 	lock */
- 
- 	ut_ad(lock->writer_count > 0);
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
-+		last = TRUE;
-+	}
-+
-+	if (last) {
-+		/* unlock lock_word */
-+		__sync_fetch_and_add(&(lock->lock_word),RW_LOCK_BIAS);
-+
-+		/* FIXME: It is a value of bad manners for pthread.
-+		          But we shouldn't keep an ID of not-owner. */
-+		lock->writer_thread = -1;
-+		__sync_lock_test_and_set(&(lock->writer),RW_LOCK_NOT_LOCKED);
-+	}
-+#else
- 	lock->writer_count--;
+-
+-        /* Acquire the mutex protecting the rw-lock fields */
+-	mutex_enter(&(lock->mutex));
+-
+-	/* Reset the exclusive lock if this thread no longer has an x-mode
+-	lock */
+-
+-	ut_ad(lock->writer_count > 0);
++	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
  
- 	if (lock->writer_count == 0) {
- 		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+-	lock->writer_count--;
+-
+-	if (lock->writer_count == 0) {
+-		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
++	/* lock->recursive flag also indicates if lock->writer_thread is
++	valid or stale. If we are the last of the recursive callers
++	then we must unset lock->recursive flag to indicate that the
++	lock->writer_thread is now stale.
++	Note that since we still hold the x-lock we can safely read the
++	lock_word. */
++	if (lock->lock_word == 0) {
++		/* Last caller in a possible recursive chain. */
++		lock->recursive = FALSE;
  	}
-+#endif
  
  #ifdef UNIV_SYNC_DEBUG
  	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
  #endif
  	
- 	/* If there may be waiters, signal the lock */
+-	/* If there may be waiters, signal the lock */
 -	if (UNIV_UNLIKELY(lock->waiters)
 -			&& lock->writer_count == 0) {
 -
 -	       	sg = TRUE;
 -		rw_lock_set_waiters(lock, 0);
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if (last) {
-+		if(__sync_lock_test_and_set(&lock->s_waiters, 0)){
-+			s_sg = TRUE;
-+		}
-+		if(__sync_lock_test_and_set(&lock->x_waiters, 0)){
-+			x_sg = TRUE;
-+		}
-+	}
-+#else
-+	if (lock->writer_count == 0) {
-+		if(lock->s_waiters){
-+			s_sg = TRUE;
-+			rw_lock_set_s_waiters(lock, 0);
-+		}
-+		if(lock->x_waiters){
-+			x_sg = TRUE;
-+			rw_lock_set_x_waiters(lock, 0);
-+		}
- 	}
- 	
- 	mutex_exit(&(lock->mutex));
-+#endif
- 
+-	}
+-	
+-	mutex_exit(&(lock->mutex));
+-
 -	if (UNIV_UNLIKELY(sg)) {
-+	if (UNIV_UNLIKELY(s_sg)) {
-+		os_event_set(lock->s_event);
-+		sync_array_object_signalled(sync_primary_wait_array);
-+	}
-+	if (UNIV_UNLIKELY(x_sg)) {
- #ifdef __WIN__
-+		/* I doubt the necessity of it. */
- 		os_event_set(lock->wait_ex_event);
- #endif
+-#ifdef __WIN__
+-		os_event_set(lock->wait_ex_event);
+-#endif
 -		os_event_set(lock->event);
-+		os_event_set(lock->x_event);
- 		sync_array_object_signalled(sync_primary_wait_array);
+-		sync_array_object_signalled(sync_primary_wait_array);
++	if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
++		/* Lock is now free. May have to signal read/write waiters.
++                We do not need to signal wait_ex waiters, since they cannot
++                exist when there is a writer. */
++		if (lock->waiters) {
++			rw_lock_reset_waiter_flag(lock);
++			os_event_set(lock->event);
++			sync_array_object_signalled(sync_primary_wait_array);
++		}
  	}
  
-@@ -494,9 +726,13 @@
- 
- 	ut_ad(lock->writer_count > 0);
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) {
-+#else
- 	lock->writer_count--;
+         ut_ad(rw_lock_validate(lock));
+@@ -492,18 +604,18 @@
+ 	/* Reset the exclusive lock if this thread no longer has an x-mode
+ 	lock */
  
- 	if (lock->writer_count == 0) {
-+#endif
- 		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
- 	}
+-	ut_ad(lock->writer_count > 0);
+-
+-	lock->writer_count--;
+-
+-	if (lock->writer_count == 0) {
+-		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+-	}
++	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
  
-@@ -504,7 +740,12 @@
+ #ifdef UNIV_SYNC_DEBUG
  	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
  #endif
  
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	ut_ad(!lock->s_waiters);
-+	ut_ad(!lock->x_waiters);
-+#else
++	if (lock->lock_word == 0) {
++		lock->recursive = FALSE;
++	}
++
++	lock->lock_word += X_LOCK_DECR;
++
  	ut_ad(!lock->waiters);
-+#endif
          ut_ad(rw_lock_validate(lock));
  
- #ifdef UNIV_SYNC_PERF_STAT
+diff -ruN a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
+--- a/innobase/include/sync0sync.h	2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/include/sync0sync.h	2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ Mutex, the basic synchronization primitive
+ 
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+ 
+@@ -465,8 +488,11 @@
+ struct mutex_struct {
+ 	os_event_t	event;	/* Used by sync0arr.c for the wait queue */
+ 	ulint	lock_word;	/* This ulint is the target of the atomic
+-				test-and-set instruction in Win32 */
+-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
++				test-and-set instruction in Win32 and
++				x86 32/64 with GCC 4.1.0 or later version */
++#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
++#elif defined(HAVE_ATOMIC_BUILTINS)
++#else
+ 	os_fast_mutex_t
+ 		os_fast_mutex;	/* In other systems we use this OS mutex
+ 				in place of lock_word */
+@@ -525,8 +551,7 @@
+ /* The number of system calls made in this module. Intended for performance
+ monitoring. */
+ 
+-extern 	ulint	mutex_system_call_count;
+-extern	ulint	mutex_exit_count;
++extern	ib_longlong	mutex_exit_count;
+ 
+ /* Latching order checks start when this is set TRUE */
+ extern ibool	sync_order_checks_on;
+diff -ruN a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
+--- a/innobase/include/sync0sync.ic	2009-09-10 04:02:59.000000000 +0900
++++ b/innobase/include/sync0sync.ic	2009-10-22 15:18:44.000000000 +0900
+@@ -1,21 +1,34 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ Mutex, the basic synchronization primitive
+ 
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+ 
+-#if defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
+-/* %z0: Use the size of operand %0 which in our case is *m to determine
+-instruction size, it should end up as xchgl. "1" in the input constraint,
+-says that "in" has to go in the same place as "out".*/
+-#define TAS(m, in, out) \
+-	asm volatile ("xchg%z0 %2, %0" \
+-	: "=g" (*(m)), "=r" (out) \
+-	: "1" (in))	/* Note: "1" here refers to "=r" (out) */
+-#endif
+-
+ /**********************************************************************
+ Sets the waiters field in a mutex. */
+ 
+@@ -94,12 +107,8 @@
+ 	/* mutex_fence(); */
+ 
+ 	return(res);
+-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
+-	ulint	res;
+-
+-	TAS(&mutex->lock_word, 1, res);
+-
+-	return(res);
++#elif defined(HAVE_ATOMIC_BUILTINS)
++	return __sync_lock_test_and_set(&(mutex->lock_word), 1);
+ #else
+ 	ibool	ret;
+ 
+@@ -136,10 +145,11 @@
+ 	__asm   MOV     EDX, 0
+         __asm   MOV     ECX, lw
+         __asm   XCHG    EDX, DWORD PTR [ECX]                    
+-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
+-	ulint	res;
+-
+-	TAS(&mutex->lock_word, 0, res);
++#elif defined(HAVE_ATOMIC_BUILTINS)
++	/* In theory __sync_lock_release should be used to release the lock.
++	Unfortunately, it does not work properly alone. The workaround is
++	that more conservative __sync_lock_test_and_set is used instead. */
++	__sync_lock_test_and_set(&(mutex->lock_word), 0);
+ #else
+ 	mutex->lock_word = 0;
+ 
+diff -ruN a/innobase/row/row0sel.c b/innobase/row/row0sel.c
+--- a/innobase/row/row0sel.c	2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/row/row0sel.c	2009-10-22 15:18:44.000000000 +0900
+@@ -1178,7 +1178,7 @@
+ 			rw_lock_s_lock(&btr_search_latch);
+ 
+ 			search_latch_locked = TRUE;
+-		} else if (btr_search_latch.writer_is_wait_ex) {
++		} else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
+ 
+ 			/* There is an x-latch request waiting: release the
+ 			s-latch for a moment; as an s-latch here is often
+@@ -3123,7 +3123,7 @@
+ 	/* PHASE 0: Release a possible s-latch we are holding on the
+ 	adaptive hash index latch if there is someone waiting behind */
+ 
+-	if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED)
++	if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
+ 	    && trx->has_search_latch) {
+ 
+ 		/* There is an x-latch request on the adaptive hash index:
 diff -ruN a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
---- a/innobase/sync/sync0arr.c	2009-01-30 06:42:24.000000000 +0900
-+++ b/innobase/sync/sync0arr.c	2009-04-16 16:15:28.000000000 +0900
-@@ -309,13 +309,13 @@
+--- a/innobase/sync/sync0arr.c	2009-09-10 04:03:01.000000000 +0900
++++ b/innobase/sync/sync0arr.c	2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The wait array used in synchronization primitives
+ 
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+ 
+@@ -297,25 +320,21 @@
+ }
+ 
+ /***********************************************************************
+-Puts the cell event in reset state. */
++Returns the event that the thread owning the cell waits for. */
+ static
+-ib_longlong
+-sync_cell_event_reset(
+-/*==================*/
+-				/* out: value of signal_count
+-				at the time of reset. */
+-	ulint		type,	/* in: lock type mutex/rw_lock */
+-	void*		object) /* in: the rw_lock/mutex object */
++os_event_t
++sync_cell_get_event(
++/*================*/
++	sync_cell_t*	cell) /* in: non-empty sync array cell */
  {
++	ulint type = cell->request_type;
++
  	if (type == SYNC_MUTEX) {
- 		return(os_event_reset(((mutex_t *) object)->event));
+-		return(os_event_reset(((mutex_t *) object)->event));
 -#ifdef __WIN__
++		return(((mutex_t *) cell->wait_object)->event);
  	} else if (type == RW_LOCK_WAIT_EX) {
- 		return(os_event_reset(
- 		       ((rw_lock_t *) object)->wait_ex_event));
+-		return(os_event_reset(
+-		       ((rw_lock_t *) object)->wait_ex_event));
 -#endif
 -	} else {
 -		return(os_event_reset(((rw_lock_t *) object)->event));
-+	} else if (type == RW_LOCK_SHARED) {
-+		return(os_event_reset(((rw_lock_t *) object)->s_event));
-+	} else { /* RW_LOCK_EX */
-+		return(os_event_reset(((rw_lock_t *) object)->x_event));
++		return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
++	} else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
++		return(((rw_lock_t *) cell->wait_object)->event);
  	}
  }		
  
-@@ -415,15 +415,12 @@
- 
- 	if (cell->request_type == SYNC_MUTEX) {
- 		event = ((mutex_t*) cell->wait_object)->event;
+@@ -334,6 +353,7 @@
+         ulint*   	index)  /* out: index of the reserved cell */
+ {
+         sync_cell_t*   	cell;
++	os_event_t      event;
+         ulint           i;
+         
+         ut_a(object);
+@@ -372,8 +392,8 @@
+ 			/* Make sure the event is reset and also store
+ 			the value of signal_count at which the event
+ 			was reset. */
+-			cell->signal_count = sync_cell_event_reset(type,
+-								object);
++                        event = sync_cell_get_event(cell);
++			cell->signal_count = os_event_reset(event);
+ 
+ 			cell->reservation_time = time(NULL);
+ 
+@@ -413,19 +433,7 @@
+ 	ut_a(!cell->waiting);
+ 	ut_ad(os_thread_get_curr_id() == cell->thread);
+ 
+-	if (cell->request_type == SYNC_MUTEX) {
+-		event = ((mutex_t*) cell->wait_object)->event;
 -#ifdef __WIN__
 -	/* On windows if the thread about to wait is the one which
 -	has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
 -	it waits on a special event i.e.: wait_ex_event. */
- 	} else if (cell->request_type == RW_LOCK_WAIT_EX) {
- 		event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
+-	} else if (cell->request_type == RW_LOCK_WAIT_EX) {
+-		event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
 -#endif
 -	} else {	
 -		event = ((rw_lock_t*) cell->wait_object)->event;
-+	} else if (cell->request_type == RW_LOCK_SHARED) {
-+		event = ((rw_lock_t*) cell->wait_object)->s_event;
-+	} else {
-+		event = ((rw_lock_t*) cell->wait_object)->x_event;
- 	}
- 
+-	}
+-
++	event = sync_cell_get_event(cell);
         	cell->waiting = TRUE;
-@@ -464,6 +461,7 @@
+ 
+ #ifdef UNIV_SYNC_DEBUG
+@@ -464,6 +472,7 @@
  	mutex_t*	mutex;
  	rw_lock_t*	rwlock;
  	ulint		type;
@@ -666,7 +1327,7 @@ diff -ruN a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
  
  	type = cell->request_type;
  
-@@ -492,12 +490,10 @@
+@@ -492,9 +501,7 @@
  			(ulong) mutex->waiters);
  
  	} else if (type == RW_LOCK_EX
@@ -675,12 +1336,8 @@ diff -ruN a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
 -#endif
  		   || type == RW_LOCK_SHARED) {
  
--		fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
-+		fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file);
- 
- 		rwlock = cell->old_wait_rw_lock;
- 
-@@ -505,21 +501,23 @@
+ 		fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
+@@ -505,21 +512,24 @@
  			" RW-latch at %p created in file %s line %lu\n",
  			rwlock, rwlock->cfile_name,
  			(ulong) rwlock->cline);
@@ -698,22 +1355,63 @@ diff -ruN a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
  		
  		fprintf(file,
 -			"number of readers %lu, waiters flag %lu\n"
-+			"number of readers %lu, s_waiters flag %lu, x_waiters flag %lu\n"
++			"number of readers %lu, waiters flag %lu, "
++                        "lock_word: %lx\n"
  			"Last time read locked in file %s line %lu\n"
  			"Last time write locked in file %s line %lu\n",
- 			(ulong) rwlock->reader_count,
--			(ulong) rwlock->waiters,
-+			(ulong) rwlock->s_waiters,
-+			(ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters),
+-			(ulong) rwlock->reader_count,
++			(ulong) rw_lock_get_reader_count(rwlock),
+ 			(ulong) rwlock->waiters,
++			rwlock->lock_word,
  			rwlock->last_s_file_name,
  			(ulong) rwlock->last_s_line,
  			rwlock->last_x_file_name,
-@@ -839,11 +837,15 @@
+@@ -773,28 +783,30 @@
+ 			return(TRUE);
+ 		}
+ 
+-	} else if (cell->request_type == RW_LOCK_EX
+-		   || cell->request_type == RW_LOCK_WAIT_EX) {
++	} else if (cell->request_type == RW_LOCK_EX) {
+ 
+ 	    	lock = cell->wait_object;
+ 
+-	    	if (rw_lock_get_reader_count(lock) == 0
+-		    && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++		if (lock->lock_word > 0) {
++		/* Either unlocked or only read locked. */
+ 
+ 			return(TRUE);
+ 		}
+ 
+-	    	if (rw_lock_get_reader_count(lock) == 0
+-		    && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX
+-		    && os_thread_eq(lock->writer_thread, cell->thread)) {
++        } else if (cell->request_type == RW_LOCK_WAIT_EX) {
++
++		lock = cell->wait_object;
++
++                /* lock_word == 0 means all readers have left */
++		if (lock->lock_word == 0) {
+ 
+ 			return(TRUE);
+ 		}
+-
+ 	} else if (cell->request_type == RW_LOCK_SHARED) {
+ 	    	lock = cell->wait_object;
+ 
+-		if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++                /* lock_word > 0 means no writer or reserved writer */
++		if (lock->lock_word > 0) {
+ 		
+ 			return(TRUE);
+ 		}
+@@ -839,11 +851,15 @@
  /*========================*/
  	sync_array_t*	arr)	/* in: wait array */
  {
 +#ifdef HAVE_ATOMIC_BUILTINS
-+	__sync_fetch_and_add(&(arr->sg_count),1);
++	(void) os_atomic_increment(&arr->sg_count, 1);
 +#else
          sync_array_enter(arr);
  
@@ -724,38 +1422,221 @@ diff -ruN a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
  }
  
  /**************************************************************************
-@@ -880,19 +882,23 @@
+@@ -859,6 +875,7 @@
+         sync_cell_t*   	cell;
+         ulint           count;
+         ulint           i;
++	os_event_t      event;
  
- 					mutex = cell->wait_object;
- 					os_event_set(mutex->event);
--#ifdef __WIN__
- 				} else if (cell->request_type
- 					   == RW_LOCK_WAIT_EX) {
- 					rw_lock_t*	lock;
+         sync_array_enter(arr);
+ 
+@@ -868,36 +885,20 @@
+         while (count < arr->n_reserved) {
+ 
+         	cell = sync_array_get_nth_cell(arr, i);
++		i++;
+ 
+-                if (cell->wait_object != NULL) {
+-
++		if (cell->wait_object == NULL) {
++			continue;
++		}
+                         count++;
  
- 					lock = cell->wait_object;
- 					os_event_set(lock->wait_ex_event);
+                         if (sync_arr_cell_can_wake_up(cell)) {
+ 
+-				if (cell->request_type == SYNC_MUTEX) {
+-					mutex_t*	mutex;
++			event = sync_cell_get_event(cell);
+ 
+-					mutex = cell->wait_object;
+-					os_event_set(mutex->event);
+-#ifdef __WIN__
+-				} else if (cell->request_type
+-					   == RW_LOCK_WAIT_EX) {
+-					rw_lock_t*	lock;
+-
+-					lock = cell->wait_object;
+-					os_event_set(lock->wait_ex_event);
 -#endif
 -				} else {
-+				} else if (cell->request_type
-+					   == RW_LOCK_SHARED) {
- 					rw_lock_t*	lock;
- 
- 					lock = cell->wait_object;
+-					rw_lock_t*	lock;
+-
+-					lock = cell->wait_object;
 -					os_event_set(lock->event);
-+					os_event_set(lock->s_event);
-+				} else {
-+					rw_lock_t*      lock;
-+
-+					lock = cell->wait_object;
-+					os_event_set(lock->x_event);
- 				}
-                         }
+-				}
+-                        }
++			os_event_set(event);
                  }
+ 
+-                i++;
+         }
+ 
+         sync_array_exit(arr);
+@@ -1014,4 +1015,3 @@
+         
+         sync_array_exit(arr);
+ }
+-
 diff -ruN a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
---- a/innobase/sync/sync0rw.c	2009-01-30 06:42:24.000000000 +0900
-+++ b/innobase/sync/sync0rw.c	2009-04-16 17:33:59.000000000 +0900
-@@ -99,6 +99,7 @@
+--- a/innobase/sync/sync0rw.c	2009-09-10 04:03:01.000000000 +0900
++++ b/innobase/sync/sync0rw.c	2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ The read-write lock (for thread synchronization)
+ 
+-(c) 1995 Innobase Oy
+-
+ Created 9/11/1995 Heikki Tuuri
+ *******************************************************/
+ 
+@@ -15,17 +38,110 @@
+ #include "mem0mem.h"
+ #include "srv0srv.h"
+ 
+-ulint	rw_s_system_call_count	= 0;
+-ulint	rw_s_spin_wait_count	= 0;
+-ulint	rw_s_os_wait_count	= 0;
++/*
++	IMPLEMENTATION OF THE RW_LOCK
++	=============================
++The status of a rw_lock is held in lock_word. The initial value of lock_word is
++X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
++for each x-lock. This describes the lock state for each value of lock_word:
++
++lock_word == X_LOCK_DECR:      Unlocked.
++0 < lock_word < X_LOCK_DECR:   Read locked, no waiting writers.
++			       (X_LOCK_DECR - lock_word) is the
++			       number of readers that hold the lock.
++lock_word == 0:		       Write locked
++-X_LOCK_DECR < lock_word < 0:  Read locked, with a waiting writer.
++			       (-lock_word) is the number of readers
++			       that hold the lock.
++lock_word <= -X_LOCK_DECR:     Recursively write locked. lock_word has been
++			       decremented by X_LOCK_DECR once for each lock,
++			       so the number of locks is:
++			       ((-lock_word) / X_LOCK_DECR) + 1
++When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
++other values of lock_word are invalid.
++
++The lock_word is always read and updated atomically and consistently, so that
++it always represents the state of the lock, and the state of the lock changes
++with a single atomic operation. This lock_word holds all of the information
++that a thread needs in order to determine if it is eligible to gain the lock
++or if it must spin or sleep. The one exception to this is that writer_thread
++must be verified before recursive write locks: to solve this scenario, we make
++writer_thread readable by all threads, but only writeable by the x-lock holder.
++
++The other members of the lock obey the following rules to remain consistent:
++
++recursive:	This and the writer_thread field together control the
++		behaviour of recursive x-locking.
++		lock->recursive must be FALSE in following states:
++			1) The writer_thread contains garbage i.e.: the
++			lock has just been initialized.
++			2) The lock is not x-held and there is no
++			x-waiter waiting on WAIT_EX event.
++			3) The lock is x-held or there is an x-waiter
++			waiting on WAIT_EX event but the 'pass' value
++			is non-zero.
++		lock->recursive is TRUE iff:
++			1) The lock is x-held or there is an x-waiter
++			waiting on WAIT_EX event and the 'pass' value
++			is zero.
++		This flag must be set after the writer_thread field
++		has been updated with a memory ordering barrier.
++		It is unset before the lock_word has been incremented.
++writer_thread:	Is used only in recursive x-locking. Can only be safely
++		read iff lock->recursive flag is TRUE.
++		This field is uninitialized at lock creation time and
++		is updated atomically when x-lock is acquired or when
++		move_ownership is called. A thread is only allowed to
++		set the value of this field to it's thread_id i.e.: a
++		thread cannot set writer_thread to some other thread's
++		id.
++waiters:	May be set to 1 anytime, but to avoid unnecessary wake-up
++		signals, it should only be set to 1 when there are threads
++		waiting on event. Must be 1 when a writer starts waiting to
++		ensure the current x-locking thread sends a wake-up signal
++		during unlock. May only be reset to 0 immediately before a
++		a wake-up signal is sent to event. On most platforms, a
++		memory barrier is required after waiters is set, and before
++		verifying lock_word is still held, to ensure some unlocker
++		really does see the flags new value.
++event:		Threads wait on event for read or writer lock when another
++		thread has an x-lock or an x-lock reservation (wait_ex). A
++		thread may only	wait on event after performing the following
++		actions in order:
++		   (1) Record the counter value of event (with os_event_reset).
++		   (2) Set waiters to 1.
++		   (3) Verify lock_word <= 0.
++		(1) must come before (2) to ensure signal is not missed.
++		(2) must come before (3) to ensure a signal is sent.
++		These restrictions force the above ordering.
++		Immediately before sending the wake-up signal, we should:
++		   (1) Verify lock_word == X_LOCK_DECR (unlocked)
++		   (2) Reset waiters to 0.
++wait_ex_event:	A thread may only wait on the wait_ex_event after it has
++		performed the following actions in order:
++		   (1) Decrement lock_word by X_LOCK_DECR.
++		   (2) Record counter value of wait_ex_event (os_event_reset,
++                       called from sync_array_reserve_cell).
++		   (3) Verify that lock_word < 0.
++		(1) must come first to ensures no other threads become reader
++                or next writer, and notifies unlocker that signal must be sent.
++                (2) must come before (3) to ensure the signal is not missed.
++		These restrictions force the above ordering.
++		Immediately before sending the wake-up signal, we should:
++		   Verify lock_word == 0 (waiting thread holds x_lock)
++*/
++
++ib_longlong	rw_s_spin_wait_count	= 0;
++ib_longlong	rw_s_spin_round_count	= 0;
++ib_longlong	rw_s_os_wait_count	= 0;
++
++ib_longlong	rw_s_exit_count		= 0;
++
++ib_longlong	rw_x_spin_wait_count	= 0;
++ib_longlong	rw_x_spin_round_count	= 0;
++ib_longlong	rw_x_os_wait_count	= 0;
+ 
+-ulint	rw_s_exit_count		= 0;
+-
+-ulint	rw_x_system_call_count	= 0;
+-ulint	rw_x_spin_wait_count	= 0;
+-ulint	rw_x_os_wait_count	= 0;
+-
+-ulint	rw_x_exit_count		= 0;
++ib_longlong	rw_x_exit_count		= 0;
+ 
+ /* The global list of rw-locks */
+ rw_lock_list_t	rw_lock_list;
+@@ -99,22 +215,30 @@
  	object is created, then the following call initializes
  	the sync system. */
  
@@ -763,490 +1644,588 @@ diff -ruN a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
  	mutex_create(rw_lock_get_mutex(lock));
  	mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
  
-@@ -108,8 +109,14 @@
+ 	lock->mutex.cfile_name = cfile_name;
+ 	lock->mutex.cline = cline;
+-#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
++# if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
  	lock->mutex.cmutex_name = cmutex_name;
  	lock->mutex.mutex_type = 1;
- #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-+#endif /* !HAVE_ATOMIC_BUILTINS */
+-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
++# endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
    
 -	rw_lock_set_waiters(lock, 0);
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	lock->lock_word = RW_LOCK_BIAS;
-+#endif
-+	rw_lock_set_s_waiters(lock, 0);
-+	rw_lock_set_x_waiters(lock, 0);
-+	rw_lock_set_wx_waiters(lock, 0);
- 	rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
- 	lock->writer_count = 0;
- 	rw_lock_set_reader_count(lock, 0);
-@@ -130,11 +137,9 @@
+-	rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
+-	lock->writer_count = 0;
+-	rw_lock_set_reader_count(lock, 0);
+-
+-	lock->writer_is_wait_ex = FALSE;
++#else /* HAVE_ATOMIC_BUILTINS */
++# ifdef UNIV_DEBUG
++	UT_NOT_USED(cmutex_name);
++# endif
++#endif /* HAVE_ATOMIC_BUILTINS */
++
++	lock->lock_word = X_LOCK_DECR;
++	lock->waiters = 0;
++
++	/* We set this value to signify that lock->writer_thread
++	contains garbage at initialization and cannot be used for
++	recursive x-locking. */
++	lock->recursive = FALSE;
+ 
+ #ifdef UNIV_SYNC_DEBUG
+ 	UT_LIST_INIT(lock->debug_list);
+@@ -126,15 +250,13 @@
+ 	lock->cfile_name = cfile_name;
+ 	lock->cline = cline;
+ 
++	lock->count_os_wait = 0;
+ 	lock->last_s_file_name = "not yet reserved";
  	lock->last_x_file_name = "not yet reserved";
  	lock->last_s_line = 0;
  	lock->last_x_line = 0;
--	lock->event = os_event_create(NULL);
+ 	lock->event = os_event_create(NULL);
 -
 -#ifdef __WIN__
-+	lock->s_event = os_event_create(NULL);
-+	lock->x_event = os_event_create(NULL);
  	lock->wait_ex_event = os_event_create(NULL);
 -#endif
  
  	mutex_enter(&rw_lock_list_mutex);
  	
-@@ -162,19 +167,21 @@
+@@ -158,23 +280,17 @@
+ /*=========*/
+ 	rw_lock_t*	lock)	/* in: rw-lock */
+ {
+-#ifdef UNIV_DEBUG
  	ut_a(rw_lock_validate(lock));
- #endif /* UNIV_DEBUG */
- 	ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+-#endif /* UNIV_DEBUG */
+-	ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
 -	ut_a(rw_lock_get_waiters(lock) == 0);
-+	ut_a(rw_lock_get_s_waiters(lock) == 0);
-+	ut_a(rw_lock_get_x_waiters(lock) == 0);
-+	ut_a(rw_lock_get_wx_waiters(lock) == 0);
- 	ut_a(rw_lock_get_reader_count(lock) == 0);
+-	ut_a(rw_lock_get_reader_count(lock) == 0);
++	ut_a(lock->lock_word == X_LOCK_DECR);
  	
- 	lock->magic_n = 0;
- 
+-	lock->magic_n = 0;
+-
 +#ifndef HAVE_ATOMIC_BUILTINS
  	mutex_free(rw_lock_get_mutex(lock));
-+#endif
++#endif /* HAVE_ATOMIC_BUILTINS */
  
  	mutex_enter(&rw_lock_list_mutex);
--	os_event_free(lock->event);
--
+ 	os_event_free(lock->event);
+ 
 -#ifdef __WIN__
-+	os_event_free(lock->s_event);
-+	os_event_free(lock->x_event);
  	os_event_free(lock->wait_ex_event);
 -#endif
  
  	if (UT_LIST_GET_PREV(list, lock)) {
  		ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
-@@ -192,26 +199,43 @@
- Checks that the rw-lock has been initialized and that there are no
- simultaneous shared and exclusive locks. */
+@@ -186,6 +302,8 @@
+ 	UT_LIST_REMOVE(list, rw_lock_list, lock);
  
-+/* MEMO: If HAVE_ATOMIC_BUILTINS, we should use this function statically. */
+ 	mutex_exit(&rw_lock_list_mutex);
 +
- ibool
- rw_lock_validate(
- /*=============*/
- 	rw_lock_t*	lock)
++	lock->magic_n = 0;
+ }
+ 
+ /**********************************************************************
+@@ -199,19 +317,12 @@
  {
-+	ulint	test;
  	ut_a(lock);
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_enter(rw_lock_get_mutex(lock));
-+#endif
+-	mutex_enter(rw_lock_get_mutex(lock));
++	ulint waiters = rw_lock_get_waiters(lock);
++	lint lock_word = lock->lock_word;
  
  	ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	ut_a((rw_lock_get_reader_count(lock) == 0)
- 	     || (rw_lock_get_writer(lock) != RW_LOCK_EX));
+-	ut_a((rw_lock_get_reader_count(lock) == 0)
+-	     || (rw_lock_get_writer(lock) != RW_LOCK_EX));
 -	ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
 -	     || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
 -	     || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
 -	ut_a((rw_lock_get_waiters(lock) == 0)
 -	     || (rw_lock_get_waiters(lock) == 1));
-+#endif
-+	test = rw_lock_get_writer(lock);
-+	ut_a((test == RW_LOCK_EX)
-+	     || (test == RW_LOCK_WAIT_EX)
-+	     || (test == RW_LOCK_NOT_LOCKED));
-+	test = rw_lock_get_s_waiters(lock);
-+	ut_a((test == 0)
-+	     || (test == 1));
-+	test = rw_lock_get_x_waiters(lock);
-+	ut_a((test == 0)
-+	     || (test == 1));
-+	test = rw_lock_get_wx_waiters(lock);
-+	ut_a((test == 0)
-+	     || (test == 1));
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
- 	     
- 	mutex_exit(rw_lock_get_mutex(lock));
-+#endif
+-	ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
+-	     
+-	mutex_exit(rw_lock_get_mutex(lock));
++	ut_a(waiters == 0 || waiters == 1);
++	ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
  
  	return(TRUE);
  }
-@@ -237,13 +261,14 @@
+@@ -232,18 +343,15 @@
+ 	ulint		line)	/* in: line where requested */
+ {
+         ulint    index;	/* index of the reserved wait cell */
+-        ulint    i;   	/* spin round count */
++	ulint	 i = 0;	/* spin round count */
+         
          ut_ad(rw_lock_validate(lock));
  
++	rw_s_spin_wait_count++;	/* Count calls to this function */
  lock_loop:
-+        i = 0;
-+spin_loop:
- 	rw_s_spin_wait_count++;
+-	rw_s_spin_wait_count++;
  
  	/* Spin waiting for the writer field to become free */
 -        i = 0;
- 
+-
 -        while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
 -						&& i < SYNC_SPIN_ROUNDS) {
-+        while (i < SYNC_SPIN_ROUNDS
-+			&& rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
++	while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
          	if (srv_spin_wait_delay) {
          		ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
          	}
-@@ -262,15 +287,27 @@
+@@ -262,28 +370,32 @@
  		lock->cfile_name, (ulong) lock->cline, (ulong) i);
  	}
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_enter(rw_lock_get_mutex(lock));
-+#endif
- 
+-	mutex_enter(rw_lock_get_mutex(lock));
+-
          /* We try once again to obtain the lock */
- 
+-
  	if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
-+#ifndef HAVE_ATOMIC_BUILTINS
- 		mutex_exit(rw_lock_get_mutex(lock));
-+#endif
+-		mutex_exit(rw_lock_get_mutex(lock));
++		rw_s_spin_round_count += i;
  
  		return; /* Success */
  	} else {
-+#ifdef HAVE_ATOMIC_BUILTINS
-+		/* like sync0sync.c doing */
-+		i++;
-+
+-		/* If we get here, locking did not succeed, we may
+-		suspend the thread to wait in the wait array */
+ 
+-		rw_s_system_call_count++;
 +		if (i < SYNC_SPIN_ROUNDS) {
-+			goto spin_loop;
++			goto lock_loop;
 +		}
-+#endif
- 		/* If we get here, locking did not succeed, we may
- 		suspend the thread to wait in the wait array */
++
++		rw_s_spin_round_count += i;
  
-@@ -281,9 +318,26 @@
+         	sync_array_reserve_cell(sync_primary_wait_array,
+ 				lock, RW_LOCK_SHARED,
  				file_name, line,
  				&index);
  
 -		rw_lock_set_waiters(lock, 1);
-+		rw_lock_set_s_waiters(lock, 1);
+-
+-		mutex_exit(rw_lock_get_mutex(lock));
++		/* Set waiters before checking lock_word to ensure wake-up
++                signal is sent. This may lead to some unnecessary signals. */
++		rw_lock_set_waiter_flag(lock);
 +
-+#ifdef HAVE_ATOMIC_BUILTINS
-+		/* like sync0sync.c doing */
-+		for (i = 0; i < 4; i++) {
-+			if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
-+				sync_array_free_cell(sync_primary_wait_array, index);
-+				return; /* Success */
-+			}
-+		}
- 
-+		/* If wait_ex_waiter stalls, wakes it. */
-+		if (lock->reader_count == 0
-+		    && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0)) {
-+			os_event_set(lock->wait_ex_event);
-+			sync_array_object_signalled(sync_primary_wait_array);
++		if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
++			sync_array_free_cell(sync_primary_wait_array, index);
++			return; /* Success */
 +		}
-+#else
- 		mutex_exit(rw_lock_get_mutex(lock));
-+#endif
  
  		if (srv_print_latch_waits) {
  			fprintf(stderr,
-@@ -318,13 +372,19 @@
- {
- 	ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+@@ -292,11 +404,13 @@
+ 		        lock, lock->cfile_name, (ulong) lock->cline);
+ 		}
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_enter(&(lock->mutex));
-+#endif
+-		rw_s_system_call_count++;
++		/* these stats may not be accurate */
++		lock->count_os_wait++;
+ 		rw_s_os_wait_count++;
  
- 	lock->writer_thread = os_thread_get_curr_id();
+        	 	sync_array_wait_event(sync_primary_wait_array, index);
  
- 	lock->pass = 0;
++		i = 0;
+         	goto lock_loop;
+ 	}        
+ }
+@@ -318,114 +432,130 @@
+ {
+ 	ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_exit(&(lock->mutex));
-+#else
-+	__sync_synchronize();
-+#endif
+-	mutex_enter(&(lock->mutex));
+-
+-	lock->writer_thread = os_thread_get_curr_id();
+-
+-	lock->pass = 0;
+-
+-	mutex_exit(&(lock->mutex));
++	rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
  }
  
  /**********************************************************************
-@@ -342,6 +402,89 @@
+-Low-level function for acquiring an exclusive lock. */
++Function for the next writer to call. Waits for readers to exit.
++The caller must have already decremented lock_word by X_LOCK_DECR.*/
+ UNIV_INLINE
+-ulint
+-rw_lock_x_lock_low(
+-/*===============*/
+-				/* out: RW_LOCK_NOT_LOCKED if did
+-				not succeed, RW_LOCK_EX if success,
+-				RW_LOCK_WAIT_EX, if got wait reservation */
++void
++rw_lock_x_lock_wait(
++/*================*/
+         rw_lock_t*   	lock,  	/* in: pointer to rw-lock */
++#ifdef UNIV_SYNC_DEBUG
+ 	ulint		pass,	/* in: pass value; != 0, if the lock will
+ 				be passed to another thread to unlock */
++#endif
  	const char*	file_name,/* in: file name where lock requested */
  	ulint		line)	/* in: line where requested */
  {
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
-+retry_writer:
-+	/* try to lock writer */
-+	if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
-+			== RW_LOCK_NOT_LOCKED) {
-+		/* success */
-+		/* obtain RW_LOCK_WAIT_EX right */
-+		lock->writer_thread = curr_thread;
-+		lock->pass = pass;
-+		lock->writer_is_wait_ex = TRUE;
-+		/* atomic operation may be safer about memory order. */
-+		__sync_synchronize();
-+#ifdef UNIV_SYNC_DEBUG
-+		rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
-+					file_name, line);
-+#endif
-+	}
-+
-+	if (!os_thread_eq(lock->writer_thread, curr_thread)) {
-+		return(RW_LOCK_NOT_LOCKED);
-+	}
-+
-+	switch(rw_lock_get_writer(lock)) {
-+	    case RW_LOCK_WAIT_EX:
-+		/* have right to try x-lock */
-+retry_x_lock:
-+		/* try x-lock */
-+		if(__sync_sub_and_fetch(&(lock->lock_word),
-+				RW_LOCK_BIAS) == 0) {
-+			/* success */
-+			lock->pass = pass;
-+			lock->writer_is_wait_ex = FALSE;
-+			__sync_fetch_and_add(&(lock->writer_count),1);
-+
-+#ifdef UNIV_SYNC_DEBUG
-+			rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
-+			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
-+						file_name, line);
-+#endif
-+
-+			lock->last_x_file_name = file_name;
-+			lock->last_x_line = line;
+-#ifdef UNIV_SYNC_DEBUG
+-	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+-#endif /* UNIV_SYNC_DEBUG */
+-	if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
++	ulint index;
++	ulint i = 0;
+ 
+-		if (rw_lock_get_reader_count(lock) == 0) {
++	ut_ad(lock->lock_word <= 0);
 +
-+			/* Locking succeeded, we may return */
-+			return(RW_LOCK_EX);
-+		} else if(__sync_fetch_and_add(&(lock->lock_word),
-+				RW_LOCK_BIAS) == 0) {
-+			/* retry x-lock */
-+			goto retry_x_lock;
++	while (lock->lock_word < 0) {
++		if (srv_spin_wait_delay) {
++			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
 +		}
-+
-+		/* There are readers, we have to wait */
-+		return(RW_LOCK_WAIT_EX);
-+
-+		break;
-+
-+	    case RW_LOCK_EX:
-+		/* already have x-lock */
-+		if ((lock->pass == 0)&&(pass == 0)) {
-+			__sync_fetch_and_add(&(lock->writer_count),1);
-+
-+#ifdef UNIV_SYNC_DEBUG
-+			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
-+						line);
-+#endif
-+
-+			lock->last_x_file_name = file_name;
-+			lock->last_x_line = line;
-+
-+			/* Locking succeeded, we may return */
-+			return(RW_LOCK_EX);
++		if(i < SYNC_SPIN_ROUNDS) {
++			i++;
++			continue;
 +		}
+ 			
+-			rw_lock_set_writer(lock, RW_LOCK_EX);
+-			lock->writer_thread = os_thread_get_curr_id();
+-			lock->writer_count++;
+-			lock->pass = pass;
++		/* If there is still a reader, then go to sleep.*/
++		rw_x_spin_round_count += i;
++		i = 0;
++		sync_array_reserve_cell(sync_primary_wait_array,
++					lock,
++					RW_LOCK_WAIT_EX,
++					file_name, line,
++					&index);
++		/* Check lock_word to ensure wake-up isn't missed.*/
++		if(lock->lock_word < 0) {
+ 			
++			/* these stats may not be accurate */
++			lock->count_os_wait++;
++			rw_x_os_wait_count++;
 +
-+		return(RW_LOCK_NOT_LOCKED);
-+
-+		break;
-+
-+	    default: /* RW_LOCK_NOT_LOCKED? maybe impossible */
-+		goto retry_writer;
++                        /* Add debug info as it is needed to detect possible
++                        deadlock. We must add info for WAIT_EX thread for
++                        deadlock detection to work properly. */
+ #ifdef UNIV_SYNC_DEBUG
+-			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++			rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
+ 							file_name, line);
+ #endif
+-			lock->last_x_file_name = file_name;
+-			lock->last_x_line = line;
+-		
+-			/* Locking succeeded, we may return */
+-			return(RW_LOCK_EX);
+-		} else {
+-			/* There are readers, we have to wait */
+-			rw_lock_set_writer(lock, RW_LOCK_WAIT_EX);
+-			lock->writer_thread = os_thread_get_curr_id();
+-			lock->pass = pass;
+-			lock->writer_is_wait_ex = TRUE;
+ 
++			sync_array_wait_event(sync_primary_wait_array,
++					      index);
+ #ifdef UNIV_SYNC_DEBUG
+-			rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
+-							file_name, line);
++			rw_lock_remove_debug_info(lock, pass,
++					       RW_LOCK_WAIT_EX);
+ #endif
+-
+-			return(RW_LOCK_WAIT_EX);
++                        /* It is possible to wake when lock_word < 0.
++                        We must pass the while-loop check to proceed.*/
++		} else {
++			sync_array_free_cell(sync_primary_wait_array,
++					     index);
+ 		}
 +	}
-+#else /* HAVE_ATOMIC_BUILTINS */
++	rw_x_spin_round_count += i;
++}
+ 
+-	} else if ((rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
+-		   && os_thread_eq(lock->writer_thread,
+-						os_thread_get_curr_id())) {
++/**********************************************************************
++Low-level function for acquiring an exclusive lock. */
++UNIV_INLINE
++ibool
++rw_lock_x_lock_low(
++/*===============*/
++				/* out: RW_LOCK_NOT_LOCKED if did
++				not succeed, RW_LOCK_EX if success. */
++	rw_lock_t*	lock,	/* in: pointer to rw-lock */
++	ulint		pass,	/* in: pass value; != 0, if the lock will
++				be passed to another thread to unlock */
++	const char*	file_name,/* in: file name where lock requested */
++	ulint		line)	/* in: line where requested */
++{
++	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
+ 
+-		if (rw_lock_get_reader_count(lock) == 0) {
++	if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+ 
+-			rw_lock_set_writer(lock, RW_LOCK_EX);
+-			lock->writer_count++;
+-			lock->pass = pass;
+-			lock->writer_is_wait_ex = FALSE;
++		/* lock->recursive also tells us if the writer_thread
++		field is stale or active. As we are going to write
++		our own thread id in that field it must be that the
++		current writer_thread value is not active. */
++		ut_a(!lock->recursive);
+ 
++		/* Decrement occurred: we are writer or next-writer. */
++		rw_lock_set_writer_id_and_recursion_flag(lock,
++						pass ? FALSE : TRUE);
 +
++		rw_lock_x_lock_wait(lock,
  #ifdef UNIV_SYNC_DEBUG
- 	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
- #endif /* UNIV_SYNC_DEBUG */
-@@ -423,6 +566,7 @@
- 		/* Locking succeeded, we may return */
- 		return(RW_LOCK_EX);
- 	}
-+#endif /* HAVE_ATOMIC_BUILTINS */
+-			rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
+-			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+-							file_name, line);
++				    pass,
+ #endif
++                                    file_name, line);
+ 		
+-			lock->last_x_file_name = file_name;
+-			lock->last_x_line = line;
+-
+-			/* Locking succeeded, we may return */
+-			return(RW_LOCK_EX);
++	} else {
++		/* Decrement failed: relock or failed lock */
++		if (!pass && lock->recursive
++		    && os_thread_eq(lock->writer_thread, curr_thread)) {
++			/* Relock */
++                        lock->lock_word -= X_LOCK_DECR;
++		} else {
++			/* Another thread locked before us */
++			return(FALSE);
+ 		}
+-
+-		return(RW_LOCK_WAIT_EX);
+-
+-	} else if ((rw_lock_get_writer(lock) == RW_LOCK_EX)
+-		   && os_thread_eq(lock->writer_thread,
+-						os_thread_get_curr_id())
+-		   && (lock->pass == 0)
+-		   && (pass == 0)) {
+-
+-		lock->writer_count++;
+-
++	}
+ #ifdef UNIV_SYNC_DEBUG
+-		rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
+-									line);
++	rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
++			       file_name, line);
+ #endif
++	lock->last_x_file_name = file_name;
++	lock->last_x_line = (unsigned int) line;
+ 		
+-		lock->last_x_file_name = file_name;
+-		lock->last_x_line = line;
+-
+-		/* Locking succeeded, we may return */
+-		return(RW_LOCK_EX);
+-	}
+-
+-	/* Locking did not succeed */
+-	return(RW_LOCK_NOT_LOCKED);
++	return(TRUE);
+ }
  
- 	/* Locking did not succeed */
- 	return(RW_LOCK_NOT_LOCKED);
-@@ -448,19 +592,33 @@
+ /**********************************************************************
+@@ -448,47 +578,30 @@
  	ulint		line)	/* in: line where requested */
  {
          ulint	index;  /* index of the reserved wait cell */
 -        ulint	state;	/* lock state acquired */
-+        ulint	state = RW_LOCK_NOT_LOCKED;	/* lock state acquired */
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	ulint	prev_state = RW_LOCK_NOT_LOCKED;
-+#endif
          ulint	i;	/* spin round count */
++	ibool   spinning = FALSE;
          
          ut_ad(rw_lock_validate(lock));
  
- lock_loop:
+-lock_loop:
+-        /* Acquire the mutex protecting the rw-lock fields */
+-	mutex_enter_fast(&(lock->mutex));
+-
+-	state = rw_lock_x_lock_low(lock, pass, file_name, line);
 +	i = 0;
-+
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	prev_state = state;
-+#else
-         /* Acquire the mutex protecting the rw-lock fields */
- 	mutex_enter_fast(&(lock->mutex));
-+#endif
- 
- 	state = rw_lock_x_lock_low(lock, pass, file_name, line);
  		
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if (state != prev_state) i=0; /* if progress, reset counter. */
-+#else
- 	mutex_exit(&(lock->mutex));
-+#endif
+-	mutex_exit(&(lock->mutex));
++lock_loop:
          
-+spin_loop:
- 	if (state == RW_LOCK_EX) {
+-	if (state == RW_LOCK_EX) {
++	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
++		rw_x_spin_round_count += i;
  
  		return;	/* Locking succeeded */
-@@ -468,10 +626,9 @@
- 	} else if (state == RW_LOCK_NOT_LOCKED) {
  
-  		/* Spin waiting for the writer field to become free */
+-	} else if (state == RW_LOCK_NOT_LOCKED) {
+-
+- 		/* Spin waiting for the writer field to become free */
 -		i = 0;
- 
+-
 -        	while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED 
 -               					&& i < SYNC_SPIN_ROUNDS) {
-+        	while (i < SYNC_SPIN_ROUNDS
-+			&& lock->lock_word != RW_LOCK_BIAS) {
-         		if (srv_spin_wait_delay) {
- 				ut_delay(ut_rnd_interval(0,
- 							srv_spin_wait_delay));
-@@ -485,9 +642,12 @@
-         } else if (state == RW_LOCK_WAIT_EX) {
+-        		if (srv_spin_wait_delay) {
+-				ut_delay(ut_rnd_interval(0,
+-							srv_spin_wait_delay));
+-        		}
++	} else {
+         		
+-        		i++;
+-        	}
+-		if (i == SYNC_SPIN_ROUNDS) {
+-			os_thread_yield();
++                if (!spinning) {
++                        spinning = TRUE;
++                        rw_x_spin_wait_count++;
+ 		}
+-        } else if (state == RW_LOCK_WAIT_EX) {
  
-  		/* Spin waiting for the reader count field to become zero */
+- 		/* Spin waiting for the reader count field to become zero */
 -		i = 0;
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+		while (lock->lock_word != RW_LOCK_BIAS
-+#else
-         	while (rw_lock_get_reader_count(lock) != 0 
-+#endif
-                					&& i < SYNC_SPIN_ROUNDS) {
+-
+-        	while (rw_lock_get_reader_count(lock) != 0 
+-               					&& i < SYNC_SPIN_ROUNDS) {
++		/* Spin waiting for the lock_word to become free */
++		while (i < SYNC_SPIN_ROUNDS
++		       && lock->lock_word <= 0) {
          		if (srv_spin_wait_delay) {
  				ut_delay(ut_rnd_interval(0,
-@@ -500,7 +660,6 @@
+ 							srv_spin_wait_delay));
+@@ -498,12 +611,13 @@
+         	}
+ 		if (i == SYNC_SPIN_ROUNDS) {
  			os_thread_yield();
++		} else {
++			goto lock_loop;
  		}
-         } else {
+-        } else {
 -		i = 0; /* Eliminate a compiler warning */
- 		ut_error;
+-		ut_error;
  	}	
  
-@@ -516,34 +675,69 @@
-         /* We try once again to obtain the lock. Acquire the mutex protecting
- 	the rw-lock fields */
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	prev_state = state;
-+#else
- 	mutex_enter(rw_lock_get_mutex(lock));
-+#endif
- 
- 	state = rw_lock_x_lock_low(lock, pass, file_name, line);
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	if (state != prev_state) i=0; /* if progress, reset counter. */
-+#endif
++	rw_x_spin_round_count += i;
 +
- 	if (state == RW_LOCK_EX) {
-+#ifndef HAVE_ATOMIC_BUILTINS
- 		mutex_exit(rw_lock_get_mutex(lock));
-+#endif
- 
- 		return;	/* Locking succeeded */
+ 	if (srv_print_latch_waits) {
+ 		fprintf(stderr,
+ 	"Thread %lu spin wait rw-x-lock at %p cfile %s cline %lu rnds %lu\n",
+@@ -511,39 +625,20 @@
+ 		lock->cfile_name, (ulong) lock->cline, (ulong) i);
  	}
  
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	/* like sync0sync.c doing */
-+	i++;
-+
-+	if (i < SYNC_SPIN_ROUNDS) {
-+		goto spin_loop;
-+	}
-+#endif
-+
- 	rw_x_system_call_count++;
- 
+-	rw_x_spin_wait_count++;
+-
+-        /* We try once again to obtain the lock. Acquire the mutex protecting
+-	the rw-lock fields */
+-
+-	mutex_enter(rw_lock_get_mutex(lock));
+-
+-	state = rw_lock_x_lock_low(lock, pass, file_name, line);
+-
+-	if (state == RW_LOCK_EX) {
+-		mutex_exit(rw_lock_get_mutex(lock));
+-
+-		return;	/* Locking succeeded */
+-	}
+-
+-	rw_x_system_call_count++;
+-
          sync_array_reserve_cell(sync_primary_wait_array,
  				lock,
 -#ifdef __WIN__
 -				/* On windows RW_LOCK_WAIT_EX signifies
 -				that this thread should wait on the
 -				special wait_ex_event. */
- 				(state == RW_LOCK_WAIT_EX)
- 				 ? RW_LOCK_WAIT_EX :
+-				(state == RW_LOCK_WAIT_EX)
+-				 ? RW_LOCK_WAIT_EX :
 -#endif
  				RW_LOCK_EX,
  				file_name, line,
  				&index);
  
 -	rw_lock_set_waiters(lock, 1);
-+	if (state == RW_LOCK_WAIT_EX) {
-+		rw_lock_set_wx_waiters(lock, 1);
-+	} else {
-+		rw_lock_set_x_waiters(lock, 1);
-+	}
- 
-+#ifdef HAVE_ATOMIC_BUILTINS
-+	/* like sync0sync.c doing */
-+	for (i = 0; i < 4; i++) {
-+		prev_state = state;
-+		state = rw_lock_x_lock_low(lock, pass, file_name, line);
-+		if (state == RW_LOCK_EX) {
-+			sync_array_free_cell(sync_primary_wait_array, index);
-+			return; /* Locking succeeded */
-+		}
-+		if (state != prev_state) {
-+			/* retry! */
-+			sync_array_free_cell(sync_primary_wait_array, index);
-+			goto lock_loop;
-+		}
+-
+-	mutex_exit(rw_lock_get_mutex(lock));
++	/* Waiters must be set before checking lock_word, to ensure signal
++	is sent. This could lead to a few unnecessary wake-up signals. */
++	rw_lock_set_waiter_flag(lock);
++
++	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
++		sync_array_free_cell(sync_primary_wait_array, index);
++		return; /* Locking succeeded */
 +	}
-+#else
- 	mutex_exit(rw_lock_get_mutex(lock));
-+#endif
  
  	if (srv_print_latch_waits) {
  		fprintf(stderr,
-@@ -718,7 +912,9 @@
+@@ -552,11 +647,13 @@
+ 		lock->cfile_name, (ulong) lock->cline);
+ 	}
+ 
+-	rw_x_system_call_count++;
++	/* these stats may not be accurate */
++	lock->count_os_wait++;
+ 	rw_x_os_wait_count++;
+ 
+         sync_array_wait_event(sync_primary_wait_array, index);
+ 
++	i = 0;
+         goto lock_loop;
+ }
+ 
+@@ -697,7 +794,9 @@
+ 	rw_lock_t*	lock,	/* in: rw-lock */
+ 	ulint		level)	/* in: level */
+ {
++#ifdef UNIV_SYNC_DEBUG
+ 	lock->level = level;
++#endif /* UNIV_SYNC_DEBUG */
+ }
+ 
+ #ifdef UNIV_SYNC_DEBUG
+@@ -718,7 +817,7 @@
  	ut_ad(lock);
  	ut_ad(rw_lock_validate(lock));
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_enter(&(lock->mutex));
-+#endif
+-	mutex_enter(&(lock->mutex));
++	rw_lock_debug_mutex_enter();
  
  	info = UT_LIST_GET_FIRST(lock->debug_list);
  
-@@ -728,7 +924,9 @@
+@@ -728,7 +827,7 @@
  		    && (info->pass == 0)
  		    && (info->lock_type == lock_type)) {
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 			mutex_exit(&(lock->mutex));
-+#endif
+-			mutex_exit(&(lock->mutex));
++			rw_lock_debug_mutex_exit();
  		    	/* Found! */
  
  		    	return(TRUE);
-@@ -736,7 +934,9 @@
+@@ -736,7 +835,7 @@
  
  		info = UT_LIST_GET_NEXT(list, info);
  	}
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_exit(&(lock->mutex));
-+#endif
+-	mutex_exit(&(lock->mutex));
++	rw_lock_debug_mutex_exit();
  
  	return(FALSE);
  }
-@@ -758,21 +958,25 @@
+@@ -758,22 +857,18 @@
  	ut_ad(lock);
  	ut_ad(rw_lock_validate(lock));
  	
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_enter(&(lock->mutex));
-+#endif
- 
+-	mutex_enter(&(lock->mutex));
+-
  	if (lock_type == RW_LOCK_SHARED) {
- 		if (lock->reader_count > 0) {
+-		if (lock->reader_count > 0) {
++		if (rw_lock_get_reader_count(lock) > 0) {
  			ret = TRUE;
  		}
  	} else if (lock_type == RW_LOCK_EX) {
@@ -1258,100 +2237,244 @@ diff -ruN a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
  		ut_error;
  	}
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 	mutex_exit(&(lock->mutex));
-+#endif
- 
+-	mutex_exit(&(lock->mutex));
+-
  	return(ret);
  }
-@@ -801,16 +1005,26 @@
+ 
+@@ -801,11 +896,10 @@
  
  		count++;
  
 +#ifndef HAVE_ATOMIC_BUILTINS
  		mutex_enter(&(lock->mutex));
-+#endif
- 
- 		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
- 		    || (rw_lock_get_reader_count(lock) != 0)
+-
+-		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+-		    || (rw_lock_get_reader_count(lock) != 0)
 -		    || (rw_lock_get_waiters(lock) != 0)) {
-+		    || (rw_lock_get_s_waiters(lock) != 0)
-+		    || (rw_lock_get_x_waiters(lock) != 0)
-+		    || (rw_lock_get_wx_waiters(lock) != 0)) {
++#endif
++		if (lock->lock_word != X_LOCK_DECR) {
  
  			fprintf(stderr, "RW-LOCK: %p ", lock);
  
--			if (rw_lock_get_waiters(lock)) {
--				fputs(" Waiters for the lock exist\n", stderr);
-+			if (rw_lock_get_s_waiters(lock)) {
-+				fputs(" s_waiters for the lock exist,", stderr);
-+			}
-+			if (rw_lock_get_x_waiters(lock)) {
-+				fputs(" x_waiters for the lock exist\n", stderr);
-+			}
-+			if (rw_lock_get_wx_waiters(lock)) {
-+				fputs(" wait_ex_waiters for the lock exist\n", stderr);
- 			} else {
- 				putc('\n', stderr);
- 			}
-@@ -822,7 +1036,9 @@
+@@ -821,8 +915,10 @@
+ 				info = UT_LIST_GET_NEXT(list, info);
  			}
  		}
- 
+-
 +#ifndef HAVE_ATOMIC_BUILTINS
  		mutex_exit(&(lock->mutex));
 +#endif
++
  		lock = UT_LIST_GET_NEXT(list, lock);
  	}
  
-@@ -847,10 +1063,18 @@
+@@ -845,9 +941,10 @@
+ 		"RW-LATCH INFO\n"
+ 		"RW-LATCH: %p ", lock);
  
- 	if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
- 	    || (rw_lock_get_reader_count(lock) != 0)
+-	if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+-	    || (rw_lock_get_reader_count(lock) != 0)
 -	    || (rw_lock_get_waiters(lock) != 0)) {
-+	    || (rw_lock_get_s_waiters(lock) != 0)
-+	    || (rw_lock_get_x_waiters(lock) != 0)
-+	    || (rw_lock_get_wx_waiters(lock) != 0)) {
- 
--		if (rw_lock_get_waiters(lock)) {
--			fputs(" Waiters for the lock exist\n", stderr);
-+		if (rw_lock_get_s_waiters(lock)) {
-+			fputs(" s_waiters for the lock exist,", stderr);
-+		}
-+		if (rw_lock_get_x_waiters(lock)) {
-+			fputs(" x_waiters for the lock exist\n", stderr);
-+		}
-+		if (rw_lock_get_wx_waiters(lock)) {
-+			fputs(" wait_ex_waiters for the lock exist\n", stderr);
- 		} else {
- 			putc('\n', stderr);
++#ifndef HAVE_ATOMIC_BUILTINS
++	mutex_enter(&(lock->mutex));
++#endif
++	if (lock->lock_word != X_LOCK_DECR) {
+ 
+ 		if (rw_lock_get_waiters(lock)) {
+ 			fputs(" Waiters for the lock exist\n", stderr);
+@@ -861,6 +958,9 @@
+ 			info = UT_LIST_GET_NEXT(list, info);
  		}
-@@ -909,14 +1133,18 @@
+ 	}
++#ifndef HAVE_ATOMIC_BUILTINS
++	mutex_exit(&(lock->mutex));
++#endif
+ }
+ 
+ /*************************************************************************
+@@ -909,14 +1009,11 @@
  	lock = UT_LIST_GET_FIRST(rw_lock_list);
  
  	while (lock != NULL) {
-+#ifndef HAVE_ATOMIC_BUILTINS
- 		mutex_enter(rw_lock_get_mutex(lock));
-+#endif
+-		mutex_enter(rw_lock_get_mutex(lock));
  
- 		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
- 				|| (rw_lock_get_reader_count(lock) != 0)) {
+-		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
+-				|| (rw_lock_get_reader_count(lock) != 0)) {
++		if (lock->lock_word != X_LOCK_DECR) {
  			count++;
  		}
  
-+#ifndef HAVE_ATOMIC_BUILTINS
- 		mutex_exit(rw_lock_get_mutex(lock));
-+#endif
+-		mutex_exit(rw_lock_get_mutex(lock));
  		lock = UT_LIST_GET_NEXT(list, lock);
  	}
  
+diff -ruN a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
+--- a/innobase/sync/sync0sync.c	2009-10-22 15:15:05.000000000 +0900
++++ b/innobase/sync/sync0sync.c	2009-10-22 15:18:44.000000000 +0900
+@@ -1,8 +1,31 @@
++/*****************************************************************************
++
++Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
++Copyright (c) 2008, Google Inc.
++
++Portions of this file contain modifications contributed and copyrighted by
++Google, Inc. Those modifications are gratefully acknowledged and are described
++briefly in the InnoDB documentation. The contributions by Google are
++incorporated with their permission, and subject to the conditions contained in
++the file COPYING.Google.
++
++This program is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free Software
++Foundation; version 2 of the License.
++
++This program is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License along with
++this program; if not, write to the Free Software Foundation, Inc., 59 Temple
++Place, Suite 330, Boston, MA 02111-1307 USA
++
++*****************************************************************************/
++
+ /******************************************************
+ Mutex, the basic synchronization primitive
+ 
+-(c) 1995 Innobase Oy
+-
+ Created 9/5/1995 Heikki Tuuri
+ *******************************************************/
+ 
+@@ -140,17 +163,12 @@
+ 
+ ulint	sync_dummy			= 0;
+ 
+-/* The number of system calls made in this module. Intended for performance
+-monitoring. */
+-
+-ulint	mutex_system_call_count		= 0;
+-
+ /* Number of spin waits on mutexes: for performance monitoring */
+ 
+-ulint	mutex_spin_round_count		= 0;
+-ulint	mutex_spin_wait_count		= 0;
+-ulint	mutex_os_wait_count		= 0;
+-ulint	mutex_exit_count		= 0;
++ib_longlong	mutex_spin_round_count		= 0;
++ib_longlong	mutex_spin_wait_count		= 0;
++ib_longlong	mutex_os_wait_count		= 0;
++ib_longlong	mutex_exit_count		= 0;
+ 
+ /* The global array of wait cells for implementation of the database's own
+ mutexes and read-write locks */
+@@ -240,6 +258,8 @@
+ {
+ #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+ 	mutex_reset_lock_word(mutex);
++#elif defined(HAVE_ATOMIC_BUILTINS)
++	mutex_reset_lock_word(mutex);
+ #else	
+ 	os_fast_mutex_init(&(mutex->os_fast_mutex));
+ 	mutex->lock_word = 0;
+@@ -325,7 +345,9 @@
+ 
+ 	os_event_free(mutex->event);
+ 
+-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER) 
++#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
++#elif defined(HAVE_ATOMIC_BUILTINS)
++#else
+ 	os_fast_mutex_free(&(mutex->os_fast_mutex));
+ #endif
+ 	/* If we free the mutex protecting the mutex list (freeing is
+@@ -421,6 +443,12 @@
+ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+   ut_ad(mutex);
+ 
++	/* This update is not thread safe, but we don't mind if the count
++	isn't exact. Moved out of ifdef that follows because we are willing
++	to sacrifice the cost of counting this as the data is valuable.
++	Count the number of calls to mutex_spin_wait. */
++	mutex_spin_wait_count++;
++
+ mutex_loop:
+ 
+   i = 0;
+@@ -433,7 +461,6 @@
+ 
+ spin_loop:
+ #if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+-  mutex_spin_wait_count++;
+   mutex->count_spin_loop++;
+ #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ 
+@@ -502,8 +529,6 @@
+   sync_array_reserve_cell(sync_primary_wait_array, mutex,
+                           SYNC_MUTEX, file_name, line, &index);
+ 
+-  mutex_system_call_count++;
+-
+   /* The memory order of the array reservation and the change in the
+   waiters field is important: when we suspend a thread, we first
+   reserve the cell and then set waiters field to 1. When threads are
+@@ -551,7 +576,6 @@
+             mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
+ #endif
+ 
+-  mutex_system_call_count++;
+   mutex_os_wait_count++;
+ 
+ #ifndef UNIV_HOTBACKUP
+@@ -1368,20 +1392,31 @@
+ 	FILE*	file)		/* in: file where to print */
+ {
+ #ifdef UNIV_SYNC_DEBUG
+-	fprintf(stderr, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
++	fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
+ 		mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
+ #endif
+ 
+ 	fprintf(file,
+-"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
+-"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n",
+-			(ulong) mutex_spin_wait_count,
+-		        (ulong) mutex_spin_round_count,
+-			(ulong) mutex_os_wait_count,
+-			(ulong) rw_s_spin_wait_count,
+-		        (ulong) rw_s_os_wait_count,
+-			(ulong) rw_x_spin_wait_count,
+-		        (ulong) rw_x_os_wait_count);
++		"Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
++		"RW-shared spins %llu, OS waits %llu;"
++		" RW-excl spins %llu, OS waits %llu\n",
++		mutex_spin_wait_count,
++		mutex_spin_round_count,
++		mutex_os_wait_count,
++		rw_s_spin_wait_count,
++		rw_s_os_wait_count,
++		rw_x_spin_wait_count,
++		rw_x_os_wait_count);
++
++	fprintf(file,
++		"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
++		"%.2f RW-excl\n",
++		(double) mutex_spin_round_count /
++		(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
++		(double) rw_s_spin_round_count /
++		(rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
++		(double) rw_x_spin_round_count /
++		(rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
+ }
+ 
+ /***********************************************************************
 diff -ruN a/patch_info/innodb_rw_lock.info b/patch_info/innodb_rw_lock.info
 --- /dev/null	1970-01-01 09:00:00.000000000 +0900
-+++ b/patch_info/innodb_rw_lock.info	2009-04-16 16:15:28.000000000 +0900
++++ b/patch_info/innodb_rw_lock.info	2009-10-22 15:18:30.000000000 +0900
 @@ -0,0 +1,6 @@
 +File=innodb_rw_lock.patch
-+Name=Fix of InnoDB rw_locks
++Name=Fix of InnoDB rw_locks ported from InnoDB Plugin
 +Version=1.0
-+Author=Yasufumi Kinoshita
-+License=BSD
++Author=InnoBase Oy.
++License=GPL
 +Comment=
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/percona-server.git/commitdiff/431f68fe79a66d5dfdd53f2655709e6c925fbc22



More information about the pld-cvs-commit mailing list