SOURCES (LINUX_2_6): kernel-reiser4.patch (NEW) - http://www.kernel.org/pub...

zbyniu zbyniu at pld-linux.org
Thu May 8 01:34:57 CEST 2008


Author: zbyniu                       Date: Wed May  7 23:34:57 2008 GMT
Module: SOURCES                       Tag: LINUX_2_6
---- Log message:
- http://www.kernel.org/pub/linux/kernel/people/edward/reiser4/reiser4-for-2.6/reiser4-for-2.6.25.patch.bz2

---- Files affected:
SOURCES:
   kernel-reiser4.patch (NONE -> 1.1.2.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/kernel-reiser4.patch
diff -u /dev/null SOURCES/kernel-reiser4.patch:1.1.2.1
--- /dev/null	Thu May  8 01:34:57 2008
+++ SOURCES/kernel-reiser4.patch	Thu May  8 01:34:51 2008
@@ -0,0 +1,78242 @@
+diff -urN linux-2.6.25.orig/Documentation/Changes linux-2.6.25/Documentation/Changes
+--- linux-2.6.25.orig/Documentation/Changes	2007-10-10 00:31:38.000000000 +0400
++++ linux-2.6.25/Documentation/Changes	2008-04-17 17:24:34.970177658 +0400
+@@ -36,6 +36,7 @@
+ o  e2fsprogs              1.29                    # tune2fs
+ o  jfsutils               1.1.3                   # fsck.jfs -V
+ o  reiserfsprogs          3.6.3                   # reiserfsck -V 2>&1|grep reiserfsprogs
++o  reiser4progs           1.0.0                   # fsck.reiser4 -V
+ o  xfsprogs               2.6.0                   # xfs_db -V
+ o  pcmciautils            004                     # pccardctl -V
+ o  quota-tools            3.09                    # quota -V
+@@ -145,6 +146,13 @@
+ versions of mkreiserfs, resize_reiserfs, debugreiserfs and
+ reiserfsck. These utils work on both i386 and alpha platforms.
+ 
++Reiser4progs
++------------
++
++The reiser4progs package contains utilities for the reiser4 file system.
++Detailed instructions are provided in the README file located at:
++<ftp://ftp.namesys.com/pub/reiser4progs/README>.
++
+ Xfsprogs
+ --------
+ 
+@@ -323,6 +331,10 @@
+ -------------
+ o  <http://www.namesys.com/pub/reiserfsprogs/reiserfsprogs-3.6.3.tar.gz>
+ 
++Reiser4progs
++------------
++o  <ftp://ftp.namesys.com/pub/reiser4progs/>
++
+ Xfsprogs
+ --------
+ o  <ftp://oss.sgi.com/projects/xfs/download/>
+diff -urN linux-2.6.25.orig/Documentation/filesystems/reiser4.txt linux-2.6.25/Documentation/filesystems/reiser4.txt
+--- linux-2.6.25.orig/Documentation/filesystems/reiser4.txt	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.25/Documentation/filesystems/reiser4.txt	2008-04-17 17:24:34.970177658 +0400
+@@ -0,0 +1,75 @@
++Reiser4 filesystem
++==================
++Reiser4 is a file system based on dancing tree algorithms, and is
++described at http://www.namesys.com
++
++
++References
++==========
++web page		http://namesys.com/v4/v4.html
++source code		ftp://ftp.namesys.com/pub/reiser4-for-2.6/
++userland tools		ftp://ftp.namesys.com/pub/reiser4progs/
++install page		http://www.namesys.com/install_v4.html
++
++Compile options
++===============
++Enable reiser4 debug mode
++       This checks everything imaginable while reiser4
++       runs
++
++Mount options
++=============
++tmgr.atom_max_size=N
++	Atoms containing more than N blocks will be forced to commit.
++	N is decimal.
++	Default is nr_free_pagecache_pages() / 2 at mount time.
++
++tmgr.atom_max_age=N
++	Atoms older than N seconds will be forced to commit. N is decimal.
++	Default is 600.
++
++tmgr.atom_max_flushers=N
++	Limit of concurrent flushers for one atom. 0 means no limit.
++	Default is 0.
++
++tree.cbk_cache.nr_slots=N
++	Number of slots in the cbk cache.
++
++flush.relocate_threshold=N
++	If flush finds more than N adjacent dirty leaf-level blocks it
++	will force them to be relocated.
++	Default is 64.
++
++flush.relocate_distance=N
++	If flush finds can find a block allocation closer than at most
++	N from the preceder it will relocate to that position.
++	Default is 64.
++
++flush.scan_maxnodes=N
++	The maximum number of nodes to scan left on a level during
++	flush.
++	Default is 10000.
++
++optimal_io_size=N
++	Preferred IO size. This value is used to set st_blksize of
++	struct stat.
++	Default is 65536.
++
++bsdgroups
++	Turn on BSD-style gid assignment.
++
++32bittimes
++	By default file in reiser4 have 64 bit timestamps. Files
++	created when filesystem is mounted with 32bittimes mount
++	option will get 32 bit timestamps.
++
++mtflush
++	Turn off concurrent flushing.
++
++nopseudo
++	Disable pseudo files support. See
++	http://namesys.com/v4/pseudo.html for more about pseudo files.
++
++dont_load_bitmap
++	Don't load all bitmap blocks at mount time, it is useful for
++	machines with tiny RAM and large disks.
+diff -urN linux-2.6.25.orig/fs/fs-writeback.c linux-2.6.25/fs/fs-writeback.c
+--- linux-2.6.25.orig/fs/fs-writeback.c	2008-04-19 00:15:52.280886935 +0400
++++ linux-2.6.25/fs/fs-writeback.c	2008-04-17 17:24:34.970177658 +0400
+@@ -385,8 +385,6 @@
+  * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
+  * that it can be located for waiting on in __writeback_single_inode().
+  *
+- * Called under inode_lock.
+- *
+  * If `bdi' is non-zero then we're being asked to writeback a specific queue.
+  * This function assumes that the blockdev superblock's inodes are backed by
+  * a variety of queues, so all inodes are searched.  For other superblocks,
+@@ -402,11 +400,13 @@
+  * on the writer throttling path, and we get decent balancing between many
+  * throttled threads: we don't want them all piling up on inode_sync_wait.
+  */
+-static void
+-sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
++void
++generic_sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
+ {
+ 	const unsigned long start = jiffies;	/* livelock avoidance */
+ 
++	spin_lock(&inode_lock);
++
+ 	if (!wbc->for_kupdate || list_empty(&sb->s_io))
+ 		queue_io(sb, wbc->older_than_this);
+ 
+@@ -485,8 +485,19 @@
+ 		if (!list_empty(&sb->s_more_io))
+ 			wbc->more_io = 1;
+ 	}
++	spin_unlock(&inode_lock);
+ 	return;		/* Leave any unwritten inodes on s_io */
+ }
++EXPORT_SYMBOL(generic_sync_sb_inodes);
++
++static void
++sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
++{
++	if (sb->s_op->sync_inodes)
++		sb->s_op->sync_inodes(sb, wbc);
++	else
++		generic_sync_sb_inodes(sb, wbc);
++}
+ 
+ /*
+  * Start writeback of dirty pagecache data against all unlocked inodes.
+@@ -526,11 +537,8 @@
+ 			 * be unmounted by the time it is released.
+ 			 */
+ 			if (down_read_trylock(&sb->s_umount)) {
+-				if (sb->s_root) {
+-					spin_lock(&inode_lock);
++				if (sb->s_root)
+ 					sync_sb_inodes(sb, wbc);
+-					spin_unlock(&inode_lock);
+-				}
+ 				up_read(&sb->s_umount);
+ 			}
+ 			spin_lock(&sb_lock);
+@@ -568,9 +576,7 @@
+ 			(inodes_stat.nr_inodes - inodes_stat.nr_unused) +
+ 			nr_dirty + nr_unstable;
+ 	wbc.nr_to_write += wbc.nr_to_write / 2;		/* Bit more for luck */
+-	spin_lock(&inode_lock);
+ 	sync_sb_inodes(sb, &wbc);
+-	spin_unlock(&inode_lock);
+ }
+ 
+ /*
+diff -urN linux-2.6.25.orig/fs/Kconfig linux-2.6.25/fs/Kconfig
+--- linux-2.6.25.orig/fs/Kconfig	2008-04-19 00:15:51.328640267 +0400
++++ linux-2.6.25/fs/Kconfig	2008-04-17 17:24:34.974178883 +0400
+@@ -274,6 +274,8 @@
+ 	default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
+ 	default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m
+ 
++source "fs/reiser4/Kconfig"
++
+ config REISERFS_FS
+ 	tristate "Reiserfs support"
+ 	help
+diff -urN linux-2.6.25.orig/fs/Makefile linux-2.6.25/fs/Makefile
+--- linux-2.6.25.orig/fs/Makefile	2008-04-19 00:15:51.332641304 +0400
++++ linux-2.6.25/fs/Makefile	2008-04-17 17:24:34.978180109 +0400
+@@ -67,6 +67,7 @@
+  
+ # Do not add any filesystems before this line
+ obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
++obj-$(CONFIG_REISER4_FS)	+= reiser4/
+ obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
+ obj-$(CONFIG_EXT4DEV_FS)	+= ext4/ # Before ext2 so root fs can be ext4dev
+ obj-$(CONFIG_JBD)		+= jbd/
+diff -urN linux-2.6.25.orig/fs/reiser4/as_ops.c linux-2.6.25/fs/reiser4/as_ops.c
+--- linux-2.6.25.orig/fs/reiser4/as_ops.c	1970-01-01 03:00:00.000000000 +0300
++++ linux-2.6.25/fs/reiser4/as_ops.c	2008-04-17 17:24:34.978180109 +0400
+@@ -0,0 +1,377 @@
++/* Copyright 2003 by Hans Reiser, licensing governed by reiser4/README */
++
++/* Interface to VFS. Reiser4 address_space_operations are defined here. */
++
++#include "forward.h"
++#include "debug.h"
++#include "dformat.h"
++#include "coord.h"
++#include "plugin/item/item.h"
++#include "plugin/file/file.h"
++#include "plugin/security/perm.h"
++#include "plugin/disk_format/disk_format.h"
++#include "plugin/plugin.h"
++#include "plugin/plugin_set.h"
++#include "plugin/object.h"
++#include "txnmgr.h"
++#include "jnode.h"
++#include "znode.h"
++#include "block_alloc.h"
++#include "tree.h"
++#include "vfs_ops.h"
++#include "inode.h"
++#include "page_cache.h"
++#include "ktxnmgrd.h"
++#include "super.h"
++#include "reiser4.h"
++#include "entd.h"
++
++#include <linux/profile.h>
++#include <linux/types.h>
++#include <linux/mount.h>
++#include <linux/vfs.h>
++#include <linux/mm.h>
++#include <linux/buffer_head.h>
++#include <linux/dcache.h>
++#include <linux/list.h>
++#include <linux/pagemap.h>
++#include <linux/slab.h>
++#include <linux/seq_file.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/writeback.h>
++#include <linux/backing-dev.h>
++#include <linux/quotaops.h>
++#include <linux/security.h>
++
++/* address space operations */
++
++/**
++ * reiser4_set_page_dirty - set dirty bit, tag in page tree, dirty accounting
++ * @page: page to be dirtied
++ *
++ * Operation of struct address_space_operations. This implementation is used by
++ * unix and cryptcompress file plugins.
++ *
++ * This is called when reiser4 page gets dirtied outside of reiser4, for
++ * example, when dirty bit is moved from pte to physical page.
++ *
++ * Tags page in the mapping's page tree with special tag so that it is possible
++ * to do all the reiser4 specific work wrt dirty pages (jnode creation,
++ * capturing by an atom) later because it can not be done in the contexts where
++ * set_page_dirty is called.
++ */
++int reiser4_set_page_dirty(struct page *page)
++{
++	/* this page can be unformatted only */
++	assert("vs-1734", (page->mapping &&
++			   page->mapping->host &&
++			   reiser4_get_super_fake(page->mapping->host->i_sb) !=
++			   page->mapping->host
++			   && reiser4_get_cc_fake(page->mapping->host->i_sb) !=
++			   page->mapping->host
++			   && reiser4_get_bitmap_fake(page->mapping->host->i_sb) !=
++			   page->mapping->host));
++
++	if (!TestSetPageDirty(page)) {
++		struct address_space *mapping = page->mapping;
++
++		if (mapping) {
++			write_lock_irq(&mapping->tree_lock);
++
++			/* check for race with truncate */
++			if (page->mapping) {
++				assert("vs-1652", page->mapping == mapping);
++				if (mapping_cap_account_dirty(mapping))
++					inc_zone_page_state(page,
++							NR_FILE_DIRTY);
++				radix_tree_tag_set(&mapping->page_tree,
++						   page->index,
++						   PAGECACHE_TAG_REISER4_MOVED);
++			}
++			write_unlock_irq(&mapping->tree_lock);
++			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
++		}
++	}
++	return 0;
++}
++
++/* ->invalidatepage method for reiser4 */
++
++/*
++ * this is called for each truncated page from
++ * truncate_inode_pages()->truncate_{complete,partial}_page().
++ *
++ * At the moment of call, page is under lock, and outstanding io (if any) has
++ * completed.
++ */
++
++/**
++ * reiser4_invalidatepage
++ * @page: page to invalidate
++ * @offset: starting offset for partial invalidation
++ *
++ */
++void reiser4_invalidatepage(struct page *page, unsigned long offset)
++{
++	int ret = 0;
++	reiser4_context *ctx;
++	struct inode *inode;
++	jnode *node;
++
++	/*
++	 * This is called to truncate file's page.
++	 *
++	 * Originally, reiser4 implemented truncate in a standard way
++	 * (vmtruncate() calls ->invalidatepage() on all truncated pages
++	 * first, then file system ->truncate() call-back is invoked).
++	 *
++	 * This lead to the problem when ->invalidatepage() was called on a
++	 * page with jnode that was captured into atom in ASTAGE_PRE_COMMIT
++	 * process. That is, truncate was bypassing transactions. To avoid
++	 * this, try_capture_page_to_invalidate() call was added here.
++	 *
++	 * After many troubles with vmtruncate() based truncate (including
++	 * races with flush, tail conversion, etc.) it was re-written in the
++	 * top-to-bottom style: items are killed in reiser4_cut_tree_object()
++	 * and pages belonging to extent are invalidated in kill_hook_extent().
++	 * So probably now additional call to capture is not needed here.
++	 */
++
++	assert("nikita-3137", PageLocked(page));
++	assert("nikita-3138", !PageWriteback(page));
++	inode = page->mapping->host;
++
++	/*
++	 * ->invalidatepage() should only be called for the unformatted
++	 * jnodes. Destruction of all other types of jnodes is performed
++	 * separately. But, during some corner cases (like handling errors
++	 * during mount) it is simpler to let ->invalidatepage to be called on
++	 * them. Check for this, and do nothing.
++	 */
++	if (reiser4_get_super_fake(inode->i_sb) == inode)
++		return;
++	if (reiser4_get_cc_fake(inode->i_sb) == inode)
++		return;
++	if (reiser4_get_bitmap_fake(inode->i_sb) == inode)
++		return;
++	assert("vs-1426", PagePrivate(page));
++	assert("vs-1427",
++	       page->mapping == jnode_get_mapping(jnode_by_page(page)));
++	assert("", jprivate(page) != NULL);
++	assert("", ergo(inode_file_plugin(inode) !=
++			file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID),
++			offset == 0));
++
++	ctx = reiser4_init_context(inode->i_sb);
++	if (IS_ERR(ctx))
++		return;
++
++	node = jprivate(page);
++	spin_lock_jnode(node);
++	if (!(node->state & ((1 << JNODE_DIRTY) | (1<< JNODE_FLUSH_QUEUED) |
++			  (1 << JNODE_WRITEBACK) | (1 << JNODE_OVRWR)))) {
++		/* there is not need to capture */
++		jref(node);
++		JF_SET(node, JNODE_HEARD_BANSHEE);
++		page_clear_jnode(page, node);
++		reiser4_uncapture_jnode(node);
++		unhash_unformatted_jnode(node);
++		jput(node);
++		reiser4_exit_context(ctx);
++		return;
++	}
++	spin_unlock_jnode(node);
++
++	/* capture page being truncated. */
++	ret = try_capture_page_to_invalidate(page);
++	if (ret != 0)
++		warning("nikita-3141", "Cannot capture: %i", ret);
++
++	if (offset == 0) {
++		/* remove jnode from transaction and detach it from page. */
++		jref(node);
++		JF_SET(node, JNODE_HEARD_BANSHEE);
++		/* page cannot be detached from jnode concurrently, because it
++		 * is locked */
++		reiser4_uncapture_page(page);
++
++		/* this detaches page from jnode, so that jdelete will not try
++		 * to lock page which is already locked */
++		spin_lock_jnode(node);
++		page_clear_jnode(page, node);
++		spin_unlock_jnode(node);
++		unhash_unformatted_jnode(node);
++
++		jput(node);
++	}
++
++	reiser4_exit_context(ctx);
++}
++
++/* help function called from reiser4_releasepage(). It returns true if jnode
++ * can be detached from its page and page released. */
++int jnode_is_releasable(jnode * node /* node to check */ )
++{
++	assert("nikita-2781", node != NULL);
++	assert_spin_locked(&(node->guard));
++	assert_spin_locked(&(node->load));
++
++	/* is some thread is currently using jnode page, later cannot be
++	 * detached */
++	if (atomic_read(&node->d_count) != 0) {
++		return 0;
++	}
++
++	assert("vs-1214", !jnode_is_loaded(node));
++
++	/*
++	 * can only release page if real block number is assigned to it. Simple
++	 * check for ->atom wouldn't do, because it is possible for node to be
++	 * clean, not it atom yet, and still having fake block number. For
++	 * example, node just created in jinit_new().
++	 */
++	if (reiser4_blocknr_is_fake(jnode_get_block(node)))
++		return 0;
++
++	/*
++	 * pages prepared for write can not be released anyway, so avoid
++	 * detaching jnode from the page
++	 */
++	if (JF_ISSET(node, JNODE_WRITE_PREPARED))
++		return 0;
++
++	/*
++	 * dirty jnode cannot be released. It can however be submitted to disk
++	 * as part of early flushing, but only after getting flush-prepped.
++	 */
++	if (JF_ISSET(node, JNODE_DIRTY))
++		return 0;
++
++	/* overwrite set is only written by log writer. */
++	if (JF_ISSET(node, JNODE_OVRWR))
++		return 0;
++
++	/* jnode is already under writeback */
++	if (JF_ISSET(node, JNODE_WRITEBACK))
++		return 0;
++
++	/* don't flush bitmaps or journal records */
++	if (!jnode_is_znode(node) && !jnode_is_unformatted(node))
++		return 0;
++
++	return 1;
++}
++
++/*
++ * ->releasepage method for reiser4
++ *
++ * This is called by VM scanner when it comes across clean page.  What we have
++ * to do here is to check whether page can really be released (freed that is)
++ * and if so, detach jnode from it and remove page from the page cache.
++ *
++ * Check for releasability is done by releasable() function.
++ */
++int reiser4_releasepage(struct page *page, gfp_t gfp UNUSED_ARG)
++{
++	jnode *node;
++
++	assert("nikita-2257", PagePrivate(page));
++	assert("nikita-2259", PageLocked(page));
++	assert("nikita-2892", !PageWriteback(page));
++	assert("nikita-3019", reiser4_schedulable());
++
++	/* NOTE-NIKITA: this can be called in the context of reiser4 call. It
++	   is not clear what to do in this case. A lot of deadlocks seems be
++	   possible. */
++
++	node = jnode_by_page(page);
++	assert("nikita-2258", node != NULL);
++	assert("reiser4-4", page->mapping != NULL);
++	assert("reiser4-5", page->mapping->host != NULL);
++
++	if (PageDirty(page))
++		return 0;
++
++	/* extra page reference is used by reiser4 to protect
++	 * jnode<->page link from this ->releasepage(). */
++	if (page_count(page) > 3)
++		return 0;
++
++	/* releasable() needs jnode lock, because it looks at the jnode fields
++	 * and we need jload_lock here to avoid races with jload(). */
++	spin_lock_jnode(node);
++	spin_lock(&(node->load));
++	if (jnode_is_releasable(node)) {
++		struct address_space *mapping;
++
++		mapping = page->mapping;
++		jref(node);
++		/* there is no need to synchronize against
++		 * jnode_extent_write() here, because pages seen by
++		 * jnode_extent_write() are !releasable(). */
++		page_clear_jnode(page, node);
++		spin_unlock(&(node->load));
++		spin_unlock_jnode(node);
++
++		/* we are under memory pressure so release jnode also. */
++		jput(node);
++
++		return 1;
++	} else {
++		spin_unlock(&(node->load));
++		spin_unlock_jnode(node);
++		assert("nikita-3020", reiser4_schedulable());
++		return 0;
++	}
++}
++
++int reiser4_readpage(struct file *file, struct page *page)
++{
++	assert("edward-1533", PageLocked(page));
++	assert("edward-1534", !PageUptodate(page));
++	assert("edward-1535", page->mapping && page->mapping->host);
++
++	return inode_file_plugin(page->mapping->host)->readpage(file, page);
++}
++
++int reiser4_readpages(struct file *file, struct address_space *mapping,
++		      struct list_head *pages, unsigned nr_pages)
++{
++	return inode_file_plugin(mapping->host)->readpages(file, mapping,
++							   pages, nr_pages);
++}
++
++int reiser4_writepages(struct address_space *mapping,
++		       struct writeback_control *wbc)
++{
++	return inode_file_plugin(mapping->host)->writepages(mapping, wbc);
++}
++
++int reiser4_prepare_write(struct file *file, struct page *page,
++			  unsigned from, unsigned to)
++{
++	return inode_file_plugin(file->f_dentry->d_inode)->prepare_write(file,
++									 page,
++									 from,
++									 to);
++}
++
++int reiser4_commit_write(struct file *file, struct page *page,
++			 unsigned from, unsigned to)
++{
++	return inode_file_plugin(file->f_dentry->d_inode)->commit_write(file,
++									page,
++									from,
++									to);
++}
++
++/* Make Linus happy.
++   Local variables:
++   c-indentation-style: "K&R"
++   mode-name: "LC"
++   c-basic-offset: 8
++   tab-width: 8
++   fill-column: 120
++   End:
++*/
+diff -urN linux-2.6.25.orig/fs/reiser4/block_alloc.c linux-2.6.25/fs/reiser4/block_alloc.c
<<Diff was trimmed, longer than 597 lines>>


More information about the pld-cvs-commit mailing list