SOURCES (LINUX_2_6_16): linux-dmcache.patch - http://www.acis.ufl....
glen
glen at pld-linux.org
Wed Nov 14 01:08:44 CET 2007
Author: glen Date: Wed Nov 14 00:08:44 2007 GMT
Module: SOURCES Tag: LINUX_2_6_16
---- Log message:
- http://www.acis.ufl.edu/~ming/dmcache/patch-2.6.19.1
---- Files affected:
SOURCES:
linux-dmcache.patch (1.1 -> 1.1.2.1)
---- Diffs:
================================================================
Index: SOURCES/linux-dmcache.patch
diff -u SOURCES/linux-dmcache.patch:1.1 SOURCES/linux-dmcache.patch:1.1.2.1
--- SOURCES/linux-dmcache.patch:1.1 Wed Nov 14 01:07:34 2007
+++ SOURCES/linux-dmcache.patch Wed Nov 14 01:08:39 2007
@@ -1,1797 +1,1786 @@
-diff -Naur linux-2.6.21.7-orig/drivers/md/dm-cache.c linux-2.6.21.7-dmcache/drivers/md/dm-cache.c
---- linux-2.6.21.7-orig/drivers/md/dm-cache.c 1969-12-31 19:00:00.000000000 -0500
-+++ linux-2.6.21.7-dmcache/drivers/md/dm-cache.c 2007-08-23 14:10:58.000000000 -0400
-@@ -0,0 +1,1766 @@
-+/****************************************************************************
-+ * dm-cache.c
-+ * Device mapper target for block-level disk caching
-+ *
-+ * Copyright (C) International Business Machines Corp., 2006
-+ * Author: Ming Zhao (mingzhao at ufl.edu)
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; under version 2 of the License.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ *
-+ ****************************************************************************/
-+
-+#include <asm/atomic.h>
-+#include <asm/checksum.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/list.h>
-+#include <linux/blkdev.h>
-+#include <linux/bio.h>
-+#include <linux/slab.h>
-+#include <linux/hash.h>
-+#include <linux/spinlock.h>
-+#include <linux/workqueue.h>
-+#include <linux/pagemap.h>
-+
-+#include "dm.h"
-+#include "dm-io.h"
-+#include "dm-bio-list.h"
-+#include "kcopyd.h"
-+
-+#define DMC_DEBUG 0
-+
-+#define DM_MSG_PREFIX "cache"
-+#define DMC_PREFIX "dm-cache: "
-+
-+#if DMC_DEBUG
-+#define DPRINTK( s, arg... ) printk(DMC_PREFIX s "\n", ##arg)
-+#else
-+#define DPRINTK( s, arg... )
-+#endif
-+
-+/* Default cache parameters */
-+#define DEFAULT_CACHE_SIZE 65536
-+#define DEFAULT_CACHE_ASSOC 1024
-+#define DEFAULT_BLOCK_SIZE 8
-+#define CONSECUTIVE_BLOCKS 512
-+
-+/* Write policy */
-+#define WRITE_THROUGH 0
-+#define WRITE_BACK 1
-+#define DEFAULT_WRITE_POLICY WRITE_THROUGH
-+
-+/* Number of pages for I/O */
-+#define DMCACHE_COPY_PAGES 1024
-+
-+/* States of a cache block */
-+#define INVALID 0
-+#define VALID 1 /* Valid */
-+#define RESERVED 2 /* Allocated but data not in place yet */
-+#define DIRTY 4 /* Locally modified */
-+#define WRITEBACK 8 /* In the process of write back */
-+
-+#define is_state(x, y) (x & y)
-+#define set_state(x, y) (x |= y)
-+#define clear_state(x, y) (x &= ~y)
-+
-+/*
-+ * Cache context
-+ */
-+struct cache_c {
-+ struct dm_dev *src_dev; /* Source device */
-+ struct dm_dev *cache_dev; /* Cache device */
-+ struct kcopyd_client *kcp_client; /* Kcopyd client for writing back data */
-+
-+ struct cacheblock *cache; /* Hash table for cache blocks */
-+ sector_t size; /* Cache size */
-+ unsigned int bits; /* Cache size in bits */
-+ unsigned int assoc; /* Cache associativity */
-+ unsigned int block_size; /* Cache block size */
-+ unsigned int block_shift; /* Cache block size in bits */
-+ unsigned int block_mask; /* Cache block mask */
-+ unsigned int consecutive_shift; /* Consecutive blocks size in bits */
-+ unsigned long counter; /* Logical timestamp of last access */
-+ unsigned int write_policy; /* Cache write policy */
-+ sector_t dirty_blocks; /* Number of dirty blocks */
-+
-+ spinlock_t lock; /* Lock to protect page allocation/deallocation */
-+ struct page_list *pages; /* Pages for I/O */
-+ unsigned int nr_pages; /* Number of pages */
-+ unsigned int nr_free_pages; /* Number of free pages */
-+ wait_queue_head_t destroyq; /* Wait queue for I/O completion */
-+ atomic_t nr_jobs; /* Number of I/O jobs */
-+ /* Stats */
-+ unsigned long reads; /* Number of reads */
-+ unsigned long writes; /* Number of writes */
-+ unsigned long cache_hits; /* Number of cache hits */
-+ unsigned long replace; /* Number of cache replacements */
-+ unsigned long writeback; /* Number of replaced dirty blocks */
-+ unsigned long dirty; /* Number of submitted dirty blocks */
-+};
-+
-+/* Cache block metadata structure */
-+struct cacheblock {
-+ spinlock_t lock; /* Lock to protect operations on the bio list */
-+ sector_t block; /* Sector number of the cached block */
-+ unsigned short state; /* State of a block */
-+ unsigned long counter; /* Logical timestamp of the block's last access */
-+ struct bio_list bios; /* List of pending bios */
-+};
-+
-+
-+/****************************************************************************
-+ * Functions and data structures for implementing a kcached to handle async
-+ * I/O. Code for page and queue handling is borrowed from kcopyd.c.
-+ ****************************************************************************/
-+
-+/*
-+ * Functions for handling pages used by async I/O.
-+ * The data asked by a bio request may not be aligned with cache blocks, in
-+ * which case additional pages are required for the request that is forwarded
-+ * to the server. A pool of pages are reserved for this purpose.
-+ */
-+
-+static struct page_list *alloc_pl(void)
-+{
-+ struct page_list *pl;
-+
-+ pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-+ if (!pl)
-+ return NULL;
-+
-+ pl->page = alloc_page(GFP_KERNEL);
-+ if (!pl->page) {
-+ kfree(pl);
-+ return NULL;
-+ }
-+
-+ return pl;
-+}
-+
-+static void free_pl(struct page_list *pl)
-+{
-+ __free_page(pl->page);
-+ kfree(pl);
-+}
-+
-+static void drop_pages(struct page_list *pl)
-+{
-+ struct page_list *next;
-+
-+ while (pl) {
-+ next = pl->next;
-+ free_pl(pl);
-+ pl = next;
-+ }
-+}
-+
-+static int kcached_get_pages(struct cache_c *dmc, unsigned int nr,
-+ struct page_list **pages)
-+{
-+ struct page_list *pl;
-+
-+ spin_lock(&dmc->lock);
-+ if (dmc->nr_free_pages < nr) {
-+ DPRINTK("kcached_get_pages: No free pages: %u<%u",
-+ dmc->nr_free_pages, nr);
-+ spin_unlock(&dmc->lock);
-+ return -ENOMEM;
-+ }
-+
-+ dmc->nr_free_pages -= nr;
-+ for (*pages = pl = dmc->pages; --nr; pl = pl->next)
-+ ;
-+
-+ dmc->pages = pl->next;
-+ pl->next = NULL;
-+
-+ spin_unlock(&dmc->lock);
-+
-+ return 0;
-+}
-+
-+static void kcached_put_pages(struct cache_c *dmc, struct page_list *pl)
-+{
-+ struct page_list *cursor;
-+
-+ spin_lock(&dmc->lock);
-+ for (cursor = pl; cursor->next; cursor = cursor->next)
-+ dmc->nr_free_pages++;
-+
-+ dmc->nr_free_pages++;
-+ cursor->next = dmc->pages;
-+ dmc->pages = pl;
-+
-+ spin_unlock(&dmc->lock);
-+}
-+
-+static int alloc_bio_pages(struct cache_c *dmc, unsigned int nr)
-+{
-+ unsigned int i;
-+ struct page_list *pl = NULL, *next;
-+
-+ for (i = 0; i < nr; i++) {
-+ next = alloc_pl();
-+ if (!next) {
-+ if (pl)
-+ drop_pages(pl);
-+ return -ENOMEM;
-+ }
-+ next->next = pl;
-+ pl = next;
-+ }
-+
-+ kcached_put_pages(dmc, pl);
-+ dmc->nr_pages += nr;
-+
-+ return 0;
-+}
-+
-+static void free_bio_pages(struct cache_c *dmc)
-+{
-+ BUG_ON(dmc->nr_free_pages != dmc->nr_pages);
-+ drop_pages(dmc->pages);
-+ dmc->pages = NULL;
-+ dmc->nr_free_pages = dmc->nr_pages = 0;
-+}
-+
-+/* Structure for a kcached job */
-+struct kcached_job {
-+ struct list_head list;
-+ struct cache_c *dmc;
-+ struct bio *bio; /* Original bio */
-+ struct io_region src;
-+ struct io_region dest;
-+ struct cacheblock *cacheblock;
-+ int rw;
-+ /*
-+ * When the original bio is not aligned with cache blocks,
-+ * we need extra bvecs and pages for padding.
-+ */
-+ struct bio_vec *bvec;
-+ unsigned int nr_pages;
-+ struct page_list *pages;
-+};
-+
-+static struct workqueue_struct *_kcached_wq;
-+static struct work_struct _kcached_work;
-+
-+static inline void wake(void)
-+{
-+ queue_work(_kcached_wq, &_kcached_work);
-+}
-+
-+#define MIN_JOBS 1024
-+
-+static struct kmem_cache *_job_cache;
-+static mempool_t *_job_pool;
-+
-+static DEFINE_SPINLOCK(_job_lock);
-+
-+static LIST_HEAD(_complete_jobs);
-+static LIST_HEAD(_io_jobs);
-+static LIST_HEAD(_pages_jobs);
-+
-+static int jobs_init(void)
-+{
-+ _job_cache = kmem_cache_create("kcached-jobs",
-+ sizeof(struct kcached_job),
-+ __alignof__(struct kcached_job),
-+ 0, NULL, NULL);
-+ if (!_job_cache)
-+ return -ENOMEM;
-+
-+ _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
-+ mempool_free_slab, _job_cache);
-+ if (!_job_pool) {
-+ kmem_cache_destroy(_job_cache);
-+ return -ENOMEM;
-+ }
-+
-+ return 0;
-+}
-+
-+static void jobs_exit(void)
-+{
-+ BUG_ON(!list_empty(&_complete_jobs));
-+ BUG_ON(!list_empty(&_io_jobs));
-+ BUG_ON(!list_empty(&_pages_jobs));
-+
-+ mempool_destroy(_job_pool);
-+ kmem_cache_destroy(_job_cache);
-+ _job_pool = NULL;
-+ _job_cache = NULL;
-+}
-+
-+/*
-+ * Functions to push and pop a job onto the head of a given job list.
-+ */
-+static inline struct kcached_job *pop(struct list_head *jobs)
-+{
-+ struct kcached_job *job = NULL;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&_job_lock, flags);
-+
-+ if (!list_empty(jobs)) {
-+ job = list_entry(jobs->next, struct kcached_job, list);
-+ list_del(&job->list);
-+ }
-+ spin_unlock_irqrestore(&_job_lock, flags);
-+
-+ return job;
-+}
-+
-+static inline void push(struct list_head *jobs, struct kcached_job *job)
-+{
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&_job_lock, flags);
-+ list_add_tail(&job->list, jobs);
-+ spin_unlock_irqrestore(&_job_lock, flags);
-+}
-+
-+
-+/****************************************************************************
-+ * Functions for asynchronously fetching data from source device and storing
-+ * data in cache device. Because the requested data may not align with the
-+ * cache blocks, extra handling is required to pad a block request and extract
-+ * the requested data from the results.
-+ ****************************************************************************/
-+
-+static void io_callback(unsigned long error, void *context)
-+{
-+ struct kcached_job *job = (struct kcached_job *) context;
-+
-+ if (error) {
-+ /* TODO */
-+ DMERR("io_callback: io error");
-+ return;
-+ }
-+
-+ if (job->rw == READ) {
-+ job->rw = WRITE;
-+ push(&_io_jobs, job);
-+ } else
-+ push(&_complete_jobs, job);
-+ wake();
-+}
-+
-+/*
-+ * Fetch data from the source device asynchronously.
-+ * For a READ bio, if a cache block is larger than the requested data, then
-+ * additional data are prefetched. Larger cache block size enables more
-+ * aggressive read prefetching, which is useful for read-mostly usage.
-+ * For a WRITE bio, if a cache block is larger than the requested data, the
-+ * entire block needs to be fetched, and larger block size incurs more overhead.
-+ * In scenaros where writes are frequent, 4KB is a good cache block size.
-+ */
-+static int do_fetch(struct kcached_job *job)
-+{
-+ int r = 0, i, j;
-+ struct bio *bio = job->bio;
-+ struct cache_c *dmc = job->dmc;
-+ unsigned int offset, head, tail, remaining, nr_vecs, idx = 0;
-+ struct bio_vec *bvec;
-+ struct page_list *pl;
-+
-+ offset = (unsigned int) (bio->bi_sector & dmc->block_mask);
-+ head = to_bytes(offset);
-+ tail = to_bytes(dmc->block_size) - bio->bi_size - head;
-+
-+ DPRINTK("do_fetch: %llu(%llu->%llu,%llu), head:%u,tail:%u",
-+ bio->bi_sector, job->src.sector, job->dest.sector,
-+ job->src.count, head, tail);
-+
-+ if (bio_data_dir(bio) == READ) { /* The original request is a READ */
-+ if (0 == job->nr_pages) { /* The request is aligned to cache block */
-+ r = dm_io_async_bvec(1, &job->src, READ,
-+ bio->bi_io_vec + bio->bi_idx,
-+ io_callback, job);
-+ return r;
-+ }
-+
-+ nr_vecs = bio->bi_vcnt - bio->bi_idx + job->nr_pages;
-+ bvec = kmalloc(nr_vecs * sizeof(*bvec), GFP_NOIO);
-+ if (!bvec) {
-+ DMERR("do_fetch: No memory");
-+ return 1;
-+ }
-+
-+ pl = job->pages;
-+ i = 0;
-+ while (head) {
-+ bvec[i].bv_len = min(head, (unsigned int)PAGE_SIZE);
-+ bvec[i].bv_offset = 0;
-+ bvec[i].bv_page = pl->page;
-+ head -= bvec[i].bv_len;
-+ pl = pl->next;
-+ i++;
-+ }
-+
-+ remaining = bio->bi_size;
-+ j = bio->bi_idx;
-+ while (remaining) {
-+ bvec[i] = bio->bi_io_vec[j];
-+ remaining -= bvec[i].bv_len;
-+ i++; j++;
-+ }
-+
-+ while (tail) {
-+ bvec[i].bv_len = min(tail, (unsigned int)PAGE_SIZE);
-+ bvec[i].bv_offset = 0;
-+ bvec[i].bv_page = pl->page;
-+ tail -= bvec[i].bv_len;
-+ pl = pl->next;
-+ i++;
-+ }
-+
-+ job->bvec = bvec;
-+ r = dm_io_async_bvec(1, &job->src, READ, job->bvec, io_callback, job);
-+ return r;
-+ } else { /* The original request is a WRITE */
-+ pl = job->pages;
-+
-+ if (head && tail) { /* Special case */
-+ bvec = kmalloc(job->nr_pages * sizeof(*bvec), GFP_KERNEL);
-+ if (!bvec) {
-+ DMERR("do_fetch: No memory");
-+ return 1;
-+ }
-+ for (i=0; i<job->nr_pages; i++) {
-+ bvec[i].bv_len = PAGE_SIZE;
-+ bvec[i].bv_offset = 0;
-+ bvec[i].bv_page = pl->page;
-+ pl = pl->next;
-+ }
-+ job->bvec = bvec;
-+ r = dm_io_async_bvec(1, &job->src, READ, job->bvec,
-+ io_callback, job);
-+ return r;
-+ }
-+
-+ bvec = kmalloc((job->nr_pages + bio->bi_vcnt - bio->bi_idx)
-+ * sizeof(*bvec), GFP_KERNEL);
-+ if (!bvec) {
-+ DMERR("do_fetch: No memory");
-+ return 1;
-+ }
-+
-+ i = 0;
-+ while (head) {
-+ bvec[i].bv_len = min(head, (unsigned int)PAGE_SIZE);
-+ bvec[i].bv_offset = 0;
-+ bvec[i].bv_page = pl->page;
-+ head -= bvec[i].bv_len;
-+ pl = pl->next;
-+ i++;
-+ }
-+
-+ remaining = bio->bi_size;
-+ j = bio->bi_idx;
-+ while (remaining) {
-+ bvec[i] = bio->bi_io_vec[j];
-+ remaining -= bvec[i].bv_len;
-+ i++; j++;
-+ }
-+
-+ if (tail) {
-+ idx = i;
-+ bvec[i].bv_offset = (to_bytes(offset) + bio->bi_size) &
-+ (PAGE_SIZE - 1);
-+ bvec[i].bv_len = PAGE_SIZE - bvec[i].bv_offset;
-+ bvec[i].bv_page = pl->page;
-+ tail -= bvec[i].bv_len;
-+ pl = pl->next; i++;
-+ while (tail) {
-+ bvec[i].bv_len = PAGE_SIZE;
-+ bvec[i].bv_offset = 0;
-+ bvec[i].bv_page = pl->page;
-+ tail -= bvec[i].bv_len;
-+ pl = pl->next; i++;
-+ }
-+ }
-+
-+ job->bvec = bvec;
-+ r = dm_io_async_bvec(1, &job->src, READ, job->bvec + idx,
-+ io_callback, job);
-+
-+ return r;
-+ }
-+}
-+
-+/*
-+ * Store data to the cache source device asynchronously.
-+ * For a READ bio request, the data fetched from the source device are returned
-+ * to kernel and stored in cache at the same time.
-+ * For a WRITE bio request, the data are written to the cache and source device
-+ * at the same time.
-+ */
-+static int do_store(struct kcached_job *job)
-+{
-+ int i, j, r = 0;
-+ struct bio *bio = job->bio, *clone;
-+ struct cache_c *dmc = job->dmc;
-+ unsigned int offset, head, tail, remaining, nr_vecs;
-+ struct bio_vec *bvec;
-+
-+ offset = (unsigned int) (bio->bi_sector & dmc->block_mask);
-+ head = to_bytes(offset);
-+ tail = to_bytes(dmc->block_size) - bio->bi_size - head;
-+
-+ DPRINTK("do_store: %llu(%llu->%llu,%llu), head:%u,tail:%u",
-+ bio->bi_sector, job->src.sector, job->dest.sector,
-+ job->src.count, head, tail);
-+
-+ /* A READ is acknowledged as soon as the requested data is fetched, and
-+ does not have to wait for it being stored in cache. The bio is cloned
-+ so that the original one can be ended here. But to avoid copying
-+ pages, we reuse the pages allocated for the original bio, and mark
-+ each of them to prevent the pages being freed before the cache
-+ insertion is completed.
-+ */
-+ if (bio_data_dir(bio) == READ) {
-+ clone = bio_clone(bio, GFP_NOIO);
-+ for (i=bio->bi_idx; i<bio->bi_vcnt; i++) {
-+ get_page(bio->bi_io_vec[i].bv_page);
-+ }
-+ DPRINTK("bio ended for %llu:%u", bio->bi_sector, bio->bi_size);
-+ bio_endio(bio, bio->bi_size, 0);
-+ bio = clone;
-+ job->bio = clone;
-+ }
-+
-+ if (0 == job->nr_pages) /* Original request is aligned with cache blocks */
-+ r = dm_io_async_bvec(1, &job->dest, WRITE, bio->bi_io_vec + bio->bi_idx,
-+ io_callback, job);
-+ else {
-+ if (bio_data_dir(bio) == WRITE && head > 0 && tail > 0) {
-+ DPRINTK("Special case: %lu %u %u", bio_data_dir(bio), head, tail);
-+ nr_vecs = job->nr_pages + bio->bi_vcnt - bio->bi_idx;
-+ if (offset && (offset + bio->bi_size < PAGE_SIZE)) nr_vecs++;
-+ DPRINTK("Create %u new vecs", nr_vecs);
-+ bvec = kmalloc(nr_vecs * sizeof(*bvec), GFP_KERNEL);
-+ if (!bvec) {
-+ DMERR("do_store: No memory");
-+ return 1;
-+ }
-+
-+ i = 0;
-+ while (head) {
-+ bvec[i].bv_len = min(head, job->bvec[i].bv_len);
-+ bvec[i].bv_offset = 0;
-+ bvec[i].bv_page = job->bvec[i].bv_page;
-+ head -= bvec[i].bv_len;
-+ i++;
-+ }
-+ remaining = bio->bi_size;
-+ j = bio->bi_idx;
-+ while (remaining) {
-+ bvec[i] = bio->bi_io_vec[j];
-+ remaining -= bvec[i].bv_len;
-+ i++; j++;
-+ }
-+ j = (to_bytes(offset) + bio->bi_size) / PAGE_SIZE;
-+ bvec[i].bv_offset = (to_bytes(offset) + bio->bi_size) -
-+ j * PAGE_SIZE;
-+ bvec[i].bv_len = PAGE_SIZE - bvec[i].bv_offset;
-+ bvec[i].bv_page = job->bvec[j].bv_page;
-+ tail -= bvec[i].bv_len;
-+ i++; j++;
-+ while (tail) {
-+ bvec[i] = job->bvec[j];
-+ tail -= bvec[i].bv_len;
-+ i++; j++;
-+ }
-+ kfree(job->bvec);
-+ job->bvec = bvec;
<<Diff was trimmed, longer than 597 lines>>
---- CVS-web:
http://cvs.pld-linux.org/cgi-bin/cvsweb.cgi/SOURCES/linux-dmcache.patch?r1=1.1&r2=1.1.2.1&f=u
More information about the pld-cvs-commit
mailing list