[packages/kernel] - added parallel dm-crypt patches (disabled by default)

lkrotowski lkrotowski at pld-linux.org
Thu Aug 1 18:00:10 CEST 2013


commit 101a744858c305f43487d5dd1610dc9d588b6877
Author: Łukasz Krotowski <lkrotowski at pld-linux.org>
Date:   Wed Jul 17 16:40:20 2013 +0200

    - added parallel dm-crypt patches (disabled by default)

 dm-crypt-dont-allocate-partial-pages.patch | 251 +++++++++++++++++++++++++++++
 dm-crypt-fix-allocation-deadlock.patch     | 111 +++++++++++++
 dm-crypt-offload-writes-to-thread.patch    | 232 ++++++++++++++++++++++++++
 dm-crypt-remove-percpu.patch               | 185 +++++++++++++++++++++
 dm-crypt-sort-requests.patch               | 137 ++++++++++++++++
 dm-crypt-unbound-workqueue.patch           |  27 ++++
 kernel.spec                                |  19 +++
 7 files changed, 962 insertions(+)
---
diff --git a/kernel.spec b/kernel.spec
index b998268..ad5c313 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -29,6 +29,7 @@
 %bcond_without	imq		# imq support
 %bcond_without	esfq		# esfq support
 %bcond_without	ipv6		# ipv6 support
+%bcond_with	padmcrypt	# parallel dm-crypt
 
 %bcond_without	vserver		# support for VServer (enabled by default)
 
@@ -225,6 +226,14 @@ Patch250:	kernel-fix_256colors_menuconfig.patch
 # https://patchwork.kernel.org/patch/236261/
 Patch400:	kernel-virtio-gl-accel.patch
 
+# http://people.redhat.com/mpatocka/patches/kernel/dm-crypt-paralelizace/current/series.html
+Patch500:	dm-crypt-remove-percpu.patch
+Patch501:	dm-crypt-unbound-workqueue.patch
+Patch502:	dm-crypt-dont-allocate-partial-pages.patch
+Patch503:	dm-crypt-fix-allocation-deadlock.patch
+Patch504:	dm-crypt-offload-writes-to-thread.patch
+Patch505:	dm-crypt-sort-requests.patch
+
 Patch2000:	kernel-small_fixes.patch
 Patch2001:	kernel-pwc-uncompress.patch
 Patch2003:	kernel-regressions.patch
@@ -703,6 +712,16 @@ cd linux-%{basever}
 # virtio-gl
 %patch400 -p1
 
+# parallel dm-crypt
+%if %{with padmcrypt}
+%patch500 -p1
+%patch501 -p1
+%patch502 -p1
+%patch503 -p1
+%patch504 -p1
+%patch505 -p1
+%endif
+
 %endif # vanilla
 
 # Small fixes:
diff --git a/dm-crypt-dont-allocate-partial-pages.patch b/dm-crypt-dont-allocate-partial-pages.patch
new file mode 100644
index 0000000..8354ab8
--- /dev/null
+++ b/dm-crypt-dont-allocate-partial-pages.patch
@@ -0,0 +1,251 @@
+dm-crypt: don't allocate pages for a partial request.
+
+This patch changes crypt_alloc_buffer so that it always allocates pages for
+a full request.
+
+This change enables further simplification and removing of one refcounts
+in the next patches.
+
+Note: the next patch is needed to fix a theoretical deadlock
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c |  133 +++++++++-----------------------------------------
+ 1 file changed, 25 insertions(+), 108 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c	2013-05-15 21:47:30.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c	2013-05-15 22:49:13.000000000 +0200
+@@ -59,7 +59,6 @@ struct dm_crypt_io {
+ 	atomic_t io_pending;
+ 	int error;
+ 	sector_t sector;
+-	struct dm_crypt_io *base_io;
+ };
+ 
+ struct dm_crypt_request {
+@@ -162,7 +161,6 @@ struct crypt_config {
+ };
+ 
+ #define MIN_IOS        16
+-#define MIN_POOL_PAGES 32
+ 
+ static struct kmem_cache *_crypt_io_pool;
+ 
+@@ -777,14 +775,13 @@ static int crypt_convert(struct crypt_co
+ 	return 0;
+ }
+ 
++static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
++
+ /*
+  * Generate a new unfragmented bio with the given size
+  * This should never violate the device limitations
+- * May return a smaller bio when running out of pages, indicated by
+- * *out_of_pages set to 1.
+  */
+-static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
+-				      unsigned *out_of_pages)
++static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
+ {
+ 	struct crypt_config *cc = io->cc;
+ 	struct bio *clone;
+@@ -798,37 +795,22 @@ static struct bio *crypt_alloc_buffer(st
+ 		return NULL;
+ 
+ 	clone_init(io, clone);
+-	*out_of_pages = 0;
+ 
+ 	for (i = 0; i < nr_iovecs; i++) {
+ 		page = mempool_alloc(cc->page_pool, gfp_mask);
+-		if (!page) {
+-			*out_of_pages = 1;
+-			break;
+-		}
+-
+-		/*
+-		 * If additional pages cannot be allocated without waiting,
+-		 * return a partially-allocated bio.  The caller will then try
+-		 * to allocate more bios while submitting this partial bio.
+-		 */
+-		gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
+ 
+ 		len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+ 
+ 		if (!bio_add_page(clone, page, len, 0)) {
+ 			mempool_free(page, cc->page_pool);
+-			break;
++			crypt_free_buffer_pages(cc, clone);
++			bio_put(clone);
++			return NULL;
+ 		}
+ 
+ 		size -= len;
+ 	}
+ 
+-	if (!clone->bi_size) {
+-		bio_put(clone);
+-		return NULL;
+-	}
+-
+ 	return clone;
+ }
+ 
+@@ -855,7 +837,6 @@ static struct dm_crypt_io *crypt_io_allo
+ 	io->base_bio = bio;
+ 	io->sector = sector;
+ 	io->error = 0;
+-	io->base_io = NULL;
+ 	io->ctx.req = NULL;
+ 	atomic_set(&io->io_pending, 0);
+ 
+@@ -870,13 +851,11 @@ static void crypt_inc_pending(struct dm_
+ /*
+  * One of the bios was finished. Check for completion of
+  * the whole request and correctly clean up the buffer.
+- * If base_io is set, wait for the last fragment to complete.
+  */
+ static void crypt_dec_pending(struct dm_crypt_io *io)
+ {
+ 	struct crypt_config *cc = io->cc;
+ 	struct bio *base_bio = io->base_bio;
+-	struct dm_crypt_io *base_io = io->base_io;
+ 	int error = io->error;
+ 
+ 	if (!atomic_dec_and_test(&io->io_pending))
+@@ -886,13 +865,7 @@ static void crypt_dec_pending(struct dm_
+ 		mempool_free(io->ctx.req, cc->req_pool);
+ 	mempool_free(io, cc->io_pool);
+ 
+-	if (likely(!base_io))
+-		bio_endio(base_bio, error);
+-	else {
+-		if (error && !base_io->error)
+-			base_io->error = error;
+-		crypt_dec_pending(base_io);
+-	}
++	bio_endio(base_bio, error);
+ }
+ 
+ /*
+@@ -1030,10 +1003,7 @@ static void kcryptd_crypt_write_convert(
+ {
+ 	struct crypt_config *cc = io->cc;
+ 	struct bio *clone;
+-	struct dm_crypt_io *new_io;
+ 	int crypt_finished;
+-	unsigned out_of_pages = 0;
+-	unsigned remaining = io->base_bio->bi_size;
+ 	sector_t sector = io->sector;
+ 	int r;
+ 
+@@ -1043,81 +1013,28 @@ static void kcryptd_crypt_write_convert(
+ 	crypt_inc_pending(io);
+ 	crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
+ 
+-	/*
+-	 * The allocated buffers can be smaller than the whole bio,
+-	 * so repeat the whole process until all the data can be handled.
+-	 */
+-	while (remaining) {
+-		clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
+-		if (unlikely(!clone)) {
+-			io->error = -ENOMEM;
+-			break;
+-		}
+-
+-		io->ctx.bio_out = clone;
+-		io->ctx.idx_out = 0;
+-
+-		remaining -= clone->bi_size;
+-		sector += bio_sectors(clone);
+-
+-		crypt_inc_pending(io);
+-
+-		r = crypt_convert(cc, &io->ctx);
+-		if (r < 0)
+-			io->error = -EIO;
+-
+-		crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
+-
+-		/* Encryption was already finished, submit io now */
+-		if (crypt_finished) {
+-			kcryptd_crypt_write_io_submit(io, 0);
+-
+-			/*
+-			 * If there was an error, do not try next fragments.
+-			 * For async, error is processed in async handler.
+-			 */
+-			if (unlikely(r < 0))
+-				break;
++	clone = crypt_alloc_buffer(io, io->base_bio->bi_size);
++	if (unlikely(!clone)) {
++		io->error = -ENOMEM;
++		goto dec;
++	}
+ 
+-			io->sector = sector;
+-		}
++	io->ctx.bio_out = clone;
++	io->ctx.idx_out = 0;
+ 
+-		/*
+-		 * Out of memory -> run queues
+-		 * But don't wait if split was due to the io size restriction
+-		 */
+-		if (unlikely(out_of_pages))
+-			congestion_wait(BLK_RW_ASYNC, HZ/100);
++	sector += bio_sectors(clone);
+ 
+-		/*
+-		 * With async crypto it is unsafe to share the crypto context
+-		 * between fragments, so switch to a new dm_crypt_io structure.
+-		 */
+-		if (unlikely(!crypt_finished && remaining)) {
+-			new_io = crypt_io_alloc(io->cc, io->base_bio,
+-						sector);
+-			crypt_inc_pending(new_io);
+-			crypt_convert_init(cc, &new_io->ctx, NULL,
+-					   io->base_bio, sector);
+-			new_io->ctx.idx_in = io->ctx.idx_in;
+-			new_io->ctx.offset_in = io->ctx.offset_in;
+-
+-			/*
+-			 * Fragments after the first use the base_io
+-			 * pending count.
+-			 */
+-			if (!io->base_io)
+-				new_io->base_io = io;
+-			else {
+-				new_io->base_io = io->base_io;
+-				crypt_inc_pending(io->base_io);
+-				crypt_dec_pending(io);
+-			}
++	crypt_inc_pending(io);
++	r = crypt_convert(cc, &io->ctx);
++	if (r)
++		io->error = -EIO;
++	crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
+ 
+-			io = new_io;
+-		}
+-	}
++	/* Encryption was already finished, submit io now */
++	if (crypt_finished)
++		kcryptd_crypt_write_io_submit(io, 0);
+ 
++dec:
+ 	crypt_dec_pending(io);
+ }
+ 
+@@ -1556,7 +1473,7 @@ static int crypt_ctr(struct dm_target *t
+ 		goto bad;
+ 	}
+ 
+-	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
++	cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
+ 	if (!cc->page_pool) {
+ 		ti->error = "Cannot allocate page mempool";
+ 		goto bad;
diff --git a/dm-crypt-fix-allocation-deadlock.patch b/dm-crypt-fix-allocation-deadlock.patch
new file mode 100644
index 0000000..8f439da
--- /dev/null
+++ b/dm-crypt-fix-allocation-deadlock.patch
@@ -0,0 +1,111 @@
+dm-crypt: avoid deadlock in mempools
+
+This patch fixes a theoretical deadlock introduced in the previous patch.
+
+The function crypt_alloc_buffer may be called concurrently. If we allocate
+from the mempool concurrently, there is a possibility of deadlock.
+For example, if we have mempool of 256 pages, two processes, each wanting 256,
+pages allocate from the mempool concurrently, it may deadlock in a situation
+where both processes have allocated 128 pages and the mempool is exhausted.
+
+In order to avoid this scenarios, we allocate the pages under a mutex.
+
+In order to not degrade performance with excessive locking, we try
+non-blocking allocations without a mutex first and if it fails, we fallback
+to a blocking allocation with a mutex.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c |   36 +++++++++++++++++++++++++++++++++---
+ 1 file changed, 33 insertions(+), 3 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c	2013-05-15 22:49:13.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c	2013-05-15 22:49:53.000000000 +0200
+@@ -118,6 +118,7 @@ struct crypt_config {
+ 	mempool_t *req_pool;
+ 	mempool_t *page_pool;
+ 	struct bio_set *bs;
++	struct mutex bio_alloc_lock;
+ 
+ 	struct workqueue_struct *io_queue;
+ 	struct workqueue_struct *crypt_queue;
+@@ -780,24 +781,46 @@ static void crypt_free_buffer_pages(stru
+ /*
+  * Generate a new unfragmented bio with the given size
+  * This should never violate the device limitations
++ *
++ * This function may be called concurrently. If we allocate from the mempool
++ * concurrently, there is a possibility of deadlock. For example, if we have
++ * mempool of 256 pages, two processes, each wanting 256, pages allocate from
++ * the mempool concurrently, it may deadlock in a situation where both processes
++ * have allocated 128 pages and the mempool is exhausted.
++ *
++ * In order to avoid this scenarios, we allocate the pages under a mutex.
++ *
++ * In order to not degrade performance with excessive locking, we try
++ * non-blocking allocations without a mutex first and if it fails, we fallback
++ * to a blocking allocation with a mutex.
+  */
+ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
+ {
+ 	struct crypt_config *cc = io->cc;
+ 	struct bio *clone;
+ 	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-	gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
++	gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
+ 	unsigned i, len;
+ 	struct page *page;
+ 
++retry:
++	if (unlikely(gfp_mask & __GFP_WAIT))
++		mutex_lock(&cc->bio_alloc_lock);
++
+ 	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
+ 	if (!clone)
+-		return NULL;
++		goto return_clone;
+ 
+ 	clone_init(io, clone);
+ 
+ 	for (i = 0; i < nr_iovecs; i++) {
+ 		page = mempool_alloc(cc->page_pool, gfp_mask);
++		if (!page) {
++			crypt_free_buffer_pages(cc, clone);
++			bio_put(clone);
++			gfp_mask |= __GFP_WAIT;
++			goto retry;
++		}
+ 
+ 		len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+ 
+@@ -805,12 +828,17 @@ static struct bio *crypt_alloc_buffer(st
+ 			mempool_free(page, cc->page_pool);
+ 			crypt_free_buffer_pages(cc, clone);
+ 			bio_put(clone);
+-			return NULL;
++			clone = NULL;
++			goto return_clone;
+ 		}
+ 
+ 		size -= len;
+ 	}
+ 
++return_clone:
++	if (unlikely(gfp_mask & __GFP_WAIT))
++		mutex_unlock(&cc->bio_alloc_lock);
++
+ 	return clone;
+ }
+ 
+@@ -1485,6 +1513,8 @@ static int crypt_ctr(struct dm_target *t
+ 		goto bad;
+ 	}
+ 
++	mutex_init(&cc->bio_alloc_lock);
++
+ 	ret = -EINVAL;
+ 	if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
+ 		ti->error = "Invalid iv_offset sector";
diff --git a/dm-crypt-offload-writes-to-thread.patch b/dm-crypt-offload-writes-to-thread.patch
new file mode 100644
index 0000000..da2e68f
--- /dev/null
+++ b/dm-crypt-offload-writes-to-thread.patch
@@ -0,0 +1,232 @@
+dm-crypt: offload writes to thread
+
+Submitting write bios directly in the encryption thread caused serious
+performance degradation. On multiprocessor machine encryption requests
+finish in a different order than they were submitted in. Consequently, write
+requests would be submitted in a different order and it could cause severe
+performance degradation.
+
+This patch moves submitting write requests to a separate thread so that
+the requests can be sorted before submitting.
+
+Sorting is implemented in the next patch.
+
+Note: it is required that a previous patch "dm-crypt: don't allocate pages
+for a partial request." is applied before applying this patch. Without
+that, this patch could introduce a crash.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c |  120 ++++++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 97 insertions(+), 23 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c	2013-05-15 22:49:53.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c	2013-05-15 22:49:57.000000000 +0200
+@@ -17,6 +17,7 @@
+ #include <linux/slab.h>
+ #include <linux/crypto.h>
+ #include <linux/workqueue.h>
++#include <linux/kthread.h>
+ #include <linux/backing-dev.h>
+ #include <linux/atomic.h>
+ #include <linux/scatterlist.h>
+@@ -59,6 +60,8 @@ struct dm_crypt_io {
+ 	atomic_t io_pending;
+ 	int error;
+ 	sector_t sector;
++
++	struct list_head list;
+ };
+ 
+ struct dm_crypt_request {
+@@ -123,6 +126,10 @@ struct crypt_config {
+ 	struct workqueue_struct *io_queue;
+ 	struct workqueue_struct *crypt_queue;
+ 
++	struct task_struct *write_thread;
++	wait_queue_head_t write_thread_wait;
++	struct list_head write_thread_list;
++
+ 	char *cipher;
+ 	char *cipher_string;
+ 
+@@ -977,37 +984,89 @@ static int kcryptd_io_read(struct dm_cry
+ 	return 0;
+ }
+ 
++static void kcryptd_io_read_work(struct work_struct *work)
++{
++	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
++
++	crypt_inc_pending(io);
++	if (kcryptd_io_read(io, GFP_NOIO))
++		io->error = -ENOMEM;
++	crypt_dec_pending(io);
++}
++
++static void kcryptd_queue_read(struct dm_crypt_io *io)
++{
++	struct crypt_config *cc = io->cc;
++
++	INIT_WORK(&io->work, kcryptd_io_read_work);
++	queue_work(cc->io_queue, &io->work);
++}
++
+ static void kcryptd_io_write(struct dm_crypt_io *io)
+ {
+ 	struct bio *clone = io->ctx.bio_out;
++
+ 	generic_make_request(clone);
+ }
+ 
+-static void kcryptd_io(struct work_struct *work)
++static int dmcrypt_write(void *data)
+ {
+-	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
++	struct crypt_config *cc = data;
++	while (1) {
++		struct list_head local_list;
++		struct blk_plug plug;
+ 
+-	if (bio_data_dir(io->base_bio) == READ) {
+-		crypt_inc_pending(io);
+-		if (kcryptd_io_read(io, GFP_NOIO))
+-			io->error = -ENOMEM;
+-		crypt_dec_pending(io);
+-	} else
+-		kcryptd_io_write(io);
+-}
++		DECLARE_WAITQUEUE(wait, current);
+ 
+-static void kcryptd_queue_io(struct dm_crypt_io *io)
+-{
+-	struct crypt_config *cc = io->cc;
++		spin_lock_irq(&cc->write_thread_wait.lock);
++continue_locked:
+ 
+-	INIT_WORK(&io->work, kcryptd_io);
+-	queue_work(cc->io_queue, &io->work);
++		if (!list_empty(&cc->write_thread_list))
++			goto pop_from_list;
++
++		__set_current_state(TASK_INTERRUPTIBLE);
++		__add_wait_queue(&cc->write_thread_wait, &wait);
++
++		spin_unlock_irq(&cc->write_thread_wait.lock);
++
++		if (unlikely(kthread_should_stop())) {
++			set_task_state(current, TASK_RUNNING);
++			remove_wait_queue(&cc->write_thread_wait, &wait);
++			break;
++		}
++
++		schedule();
++
++		set_task_state(current, TASK_RUNNING);
++		spin_lock_irq(&cc->write_thread_wait.lock);
++		__remove_wait_queue(&cc->write_thread_wait, &wait);
++		goto continue_locked;
++
++pop_from_list:
++		local_list = cc->write_thread_list;
++		local_list.next->prev = &local_list;
++		local_list.prev->next = &local_list;
++		INIT_LIST_HEAD(&cc->write_thread_list);
++
++		spin_unlock_irq(&cc->write_thread_wait.lock);
++
++		blk_start_plug(&plug);
++		do {
++			struct dm_crypt_io *io = container_of(local_list.next,
++						struct dm_crypt_io, list);
++			list_del(&io->list);
++			kcryptd_io_write(io);
++		} while (!list_empty(&local_list));
++		blk_finish_plug(&plug);
++	}
++	return 0;
+ }
+ 
+-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
++static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io)
+ {
+ 	struct bio *clone = io->ctx.bio_out;
+ 	struct crypt_config *cc = io->cc;
++	unsigned long flags;
+ 
+ 	if (unlikely(io->error < 0)) {
+ 		crypt_free_buffer_pages(cc, clone);
+@@ -1021,10 +1080,10 @@ static void kcryptd_crypt_write_io_submi
+ 
+ 	clone->bi_sector = cc->start + io->sector;
+ 
+-	if (async)
+-		kcryptd_queue_io(io);
+-	else
+-		generic_make_request(clone);
++	spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
++	list_add_tail(&io->list, &cc->write_thread_list);
++	wake_up_locked(&cc->write_thread_wait);
++	spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
+ }
+ 
+ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
+@@ -1060,7 +1119,7 @@ static void kcryptd_crypt_write_convert(
+ 
+ 	/* Encryption was already finished, submit io now */
+ 	if (crypt_finished)
+-		kcryptd_crypt_write_io_submit(io, 0);
++		kcryptd_crypt_write_io_submit(io);
+ 
+ dec:
+ 	crypt_dec_pending(io);
+@@ -1118,7 +1177,7 @@ static void kcryptd_async_done(struct cr
+ 	if (bio_data_dir(io->base_bio) == READ)
+ 		kcryptd_crypt_read_done(io);
+ 	else
+-		kcryptd_crypt_write_io_submit(io, 1);
++		kcryptd_crypt_write_io_submit(io);
+ }
+ 
+ static void kcryptd_crypt(struct work_struct *work)
+@@ -1262,6 +1321,9 @@ static void crypt_dtr(struct dm_target *
+ 	if (!cc)
+ 		return;
+ 
++	if (cc->write_thread)
++		kthread_stop(cc->write_thread);
++
+ 	if (cc->io_queue)
+ 		destroy_workqueue(cc->io_queue);
+ 	if (cc->crypt_queue)
+@@ -1578,6 +1640,18 @@ static int crypt_ctr(struct dm_target *t
+ 		goto bad;
+ 	}
+ 
++	init_waitqueue_head(&cc->write_thread_wait);
++	INIT_LIST_HEAD(&cc->write_thread_list);
++
++	cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
++	if (IS_ERR(cc->write_thread)) {
++		ret = PTR_ERR(cc->write_thread);
++		cc->write_thread = NULL;
++		ti->error = "Couldn't spawn write thread";
++		goto bad;
++	}
++	wake_up_process(cc->write_thread);
++
+ 	ti->num_flush_bios = 1;
+ 	ti->discard_zeroes_data_unsupported = true;
+ 
+@@ -1611,7 +1685,7 @@ static int crypt_map(struct dm_target *t
+ 
+ 	if (bio_data_dir(io->base_bio) == READ) {
+ 		if (kcryptd_io_read(io, GFP_NOWAIT))
+-			kcryptd_queue_io(io);
++			kcryptd_queue_read(io);
+ 	} else
+ 		kcryptd_queue_crypt(io);
+ 
diff --git a/dm-crypt-remove-percpu.patch b/dm-crypt-remove-percpu.patch
new file mode 100644
index 0000000..2467276
--- /dev/null
+++ b/dm-crypt-remove-percpu.patch
@@ -0,0 +1,185 @@
+dm-crypt: remove per-cpu structure
+
+Remove per-cpu structure and make it per-convert_context instead.
+This allows moving requests between different cpus.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c |   61 +++++++++-----------------------------------------
+ 1 file changed, 12 insertions(+), 49 deletions(-)
+
+Index: linux-3.8.6-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.8.6-fast.orig/drivers/md/dm-crypt.c	2013-04-11 17:29:10.000000000 +0200
++++ linux-3.8.6-fast/drivers/md/dm-crypt.c	2013-04-11 17:29:10.000000000 +0200
+@@ -18,7 +18,6 @@
+ #include <linux/crypto.h>
+ #include <linux/workqueue.h>
+ #include <linux/backing-dev.h>
+-#include <linux/percpu.h>
+ #include <linux/atomic.h>
+ #include <linux/scatterlist.h>
+ #include <asm/page.h>
+@@ -44,6 +43,7 @@ struct convert_context {
+ 	unsigned int idx_out;
+ 	sector_t cc_sector;
+ 	atomic_t cc_pending;
++	struct ablkcipher_request *req;
+ };
+ 
+ /*
+@@ -105,15 +105,7 @@ struct iv_lmk_private {
+ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
+ 
+ /*
+- * Duplicated per-CPU state for cipher.
+- */
+-struct crypt_cpu {
+-	struct ablkcipher_request *req;
+-};
+-
+-/*
+- * The fields in here must be read only after initialization,
+- * changing state should be in crypt_cpu.
++ * The fields in here must be read only after initialization.
+  */
+ struct crypt_config {
+ 	struct dm_dev *dev;
+@@ -143,12 +135,6 @@ struct crypt_config {
+ 	sector_t iv_offset;
+ 	unsigned int iv_size;
+ 
+-	/*
+-	 * Duplicated per cpu state. Access through
+-	 * per_cpu_ptr() only.
+-	 */
+-	struct crypt_cpu __percpu *cpu;
+-
+ 	/* ESSIV: struct crypto_cipher *essiv_tfm */
+ 	void *iv_private;
+ 	struct crypto_ablkcipher **tfms;
+@@ -184,11 +170,6 @@ static void clone_init(struct dm_crypt_i
+ static void kcryptd_queue_crypt(struct dm_crypt_io *io);
+ static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq);
+ 
+-static struct crypt_cpu *this_crypt_config(struct crypt_config *cc)
+-{
+-	return this_cpu_ptr(cc->cpu);
+-}
+-
+ /*
+  * Use this to access cipher attributes that are the same for each CPU.
+  */
+@@ -738,16 +719,15 @@ static void kcryptd_async_done(struct cr
+ static void crypt_alloc_req(struct crypt_config *cc,
+ 			    struct convert_context *ctx)
+ {
+-	struct crypt_cpu *this_cc = this_crypt_config(cc);
+ 	unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
+ 
+-	if (!this_cc->req)
+-		this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
++	if (!ctx->req)
++		ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+ 
+-	ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]);
+-	ablkcipher_request_set_callback(this_cc->req,
++	ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]);
++	ablkcipher_request_set_callback(ctx->req,
+ 	    CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+-	    kcryptd_async_done, dmreq_of_req(cc, this_cc->req));
++	    kcryptd_async_done, dmreq_of_req(cc, ctx->req));
+ }
+ 
+ /*
+@@ -756,7 +736,6 @@ static void crypt_alloc_req(struct crypt
+ static int crypt_convert(struct crypt_config *cc,
+ 			 struct convert_context *ctx)
+ {
+-	struct crypt_cpu *this_cc = this_crypt_config(cc);
+ 	int r;
+ 
+ 	atomic_set(&ctx->cc_pending, 1);
+@@ -768,7 +747,7 @@ static int crypt_convert(struct crypt_co
+ 
+ 		atomic_inc(&ctx->cc_pending);
+ 
+-		r = crypt_convert_block(cc, ctx, this_cc->req);
++		r = crypt_convert_block(cc, ctx, ctx->req);
+ 
+ 		switch (r) {
+ 		/* async */
+@@ -777,7 +756,7 @@ static int crypt_convert(struct crypt_co
+ 			INIT_COMPLETION(ctx->restart);
+ 			/* fall through*/
+ 		case -EINPROGRESS:
+-			this_cc->req = NULL;
++			ctx->req = NULL;
+ 			ctx->cc_sector++;
+ 			continue;
+ 
+@@ -877,6 +856,7 @@ static struct dm_crypt_io *crypt_io_allo
+ 	io->sector = sector;
+ 	io->error = 0;
+ 	io->base_io = NULL;
++	io->ctx.req = NULL;
+ 	atomic_set(&io->io_pending, 0);
+ 
+ 	return io;
+@@ -902,6 +882,8 @@ static void crypt_dec_pending(struct dm_
+ 	if (!atomic_dec_and_test(&io->io_pending))
+ 		return;
+ 
++	if (io->ctx.req)
++		mempool_free(io->ctx.req, cc->req_pool);
+ 	mempool_free(io, cc->io_pool);
+ 
+ 	if (likely(!base_io))
+@@ -1329,8 +1311,6 @@ static int crypt_wipe_key(struct crypt_c
+ static void crypt_dtr(struct dm_target *ti)
+ {
+ 	struct crypt_config *cc = ti->private;
+-	struct crypt_cpu *cpu_cc;
+-	int cpu;
+ 
+ 	ti->private = NULL;
+ 
+@@ -1342,13 +1322,6 @@ static void crypt_dtr(struct dm_target *
+ 	if (cc->crypt_queue)
+ 		destroy_workqueue(cc->crypt_queue);
+ 
+-	if (cc->cpu)
+-		for_each_possible_cpu(cpu) {
+-			cpu_cc = per_cpu_ptr(cc->cpu, cpu);
+-			if (cpu_cc->req)
+-				mempool_free(cpu_cc->req, cc->req_pool);
+-		}
+-
+ 	crypt_free_tfms(cc);
+ 
+ 	if (cc->bs)
+@@ -1367,9 +1340,6 @@ static void crypt_dtr(struct dm_target *
+ 	if (cc->dev)
+ 		dm_put_device(ti, cc->dev);
+ 
+-	if (cc->cpu)
+-		free_percpu(cc->cpu);
+-
+ 	kzfree(cc->cipher);
+ 	kzfree(cc->cipher_string);
+ 
+@@ -1424,13 +1394,6 @@ static int crypt_ctr_cipher(struct dm_ta
+ 	if (tmp)
+ 		DMWARN("Ignoring unexpected additional cipher options");
+ 
+-	cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)),
+-				 __alignof__(struct crypt_cpu));
+-	if (!cc->cpu) {
+-		ti->error = "Cannot allocate per cpu state";
+-		goto bad_mem;
+-	}
+-
+ 	/*
+ 	 * For compatibility with the original dm-crypt mapping format, if
+ 	 * only the cipher name is supplied, use cbc-plain.
diff --git a/dm-crypt-sort-requests.patch b/dm-crypt-sort-requests.patch
new file mode 100644
index 0000000..90bfbae
--- /dev/null
+++ b/dm-crypt-sort-requests.patch
@@ -0,0 +1,137 @@
+dm-crypt: sort writes
+
+Write requests are sorted in a red-black tree structure and are submitted
+in the sorted order.
+
+In theory the sorting should be performed by the underlying disk scheduler,
+however, in practice the disk scheduler accepts and sorts only 128 requests.
+In order to sort more requests, we need to implement our own sorting.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c |   50 +++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 35 insertions(+), 15 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c	2013-05-15 22:49:57.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c	2013-05-15 22:50:01.000000000 +0200
+@@ -21,6 +21,7 @@
+ #include <linux/backing-dev.h>
+ #include <linux/atomic.h>
+ #include <linux/scatterlist.h>
++#include <linux/rbtree.h>
+ #include <asm/page.h>
+ #include <asm/unaligned.h>
+ #include <crypto/hash.h>
+@@ -61,7 +62,7 @@ struct dm_crypt_io {
+ 	int error;
+ 	sector_t sector;
+ 
+-	struct list_head list;
++	struct rb_node rb_node;
+ };
+ 
+ struct dm_crypt_request {
+@@ -128,7 +129,7 @@ struct crypt_config {
+ 
+ 	struct task_struct *write_thread;
+ 	wait_queue_head_t write_thread_wait;
+-	struct list_head write_thread_list;
++	struct rb_root write_tree;
+ 
+ 	char *cipher;
+ 	char *cipher_string;
+@@ -1013,7 +1014,7 @@ static int dmcrypt_write(void *data)
+ {
+ 	struct crypt_config *cc = data;
+ 	while (1) {
+-		struct list_head local_list;
++		struct rb_root write_tree;
+ 		struct blk_plug plug;
+ 
+ 		DECLARE_WAITQUEUE(wait, current);
+@@ -1021,7 +1022,7 @@ static int dmcrypt_write(void *data)
+ 		spin_lock_irq(&cc->write_thread_wait.lock);
+ continue_locked:
+ 
+-		if (!list_empty(&cc->write_thread_list))
++		if (!RB_EMPTY_ROOT(&cc->write_tree))
+ 			goto pop_from_list;
+ 
+ 		__set_current_state(TASK_INTERRUPTIBLE);
+@@ -1043,20 +1044,23 @@ continue_locked:
+ 		goto continue_locked;
+ 
+ pop_from_list:
+-		local_list = cc->write_thread_list;
+-		local_list.next->prev = &local_list;
+-		local_list.prev->next = &local_list;
+-		INIT_LIST_HEAD(&cc->write_thread_list);
+-
++		write_tree = cc->write_tree;
++		cc->write_tree = RB_ROOT;
+ 		spin_unlock_irq(&cc->write_thread_wait.lock);
+ 
++		BUG_ON(rb_parent(write_tree.rb_node));
++
++		/*
++		 * Note: we cannot walk the tree here with rb_next because
++		 * the structures may be freed when kcryptd_io_write is called.
++		 */
+ 		blk_start_plug(&plug);
+ 		do {
+-			struct dm_crypt_io *io = container_of(local_list.next,
+-						struct dm_crypt_io, list);
+-			list_del(&io->list);
++			struct dm_crypt_io *io = rb_entry(rb_first(&write_tree),
++						struct dm_crypt_io, rb_node);
++			rb_erase(&io->rb_node, &write_tree);
+ 			kcryptd_io_write(io);
+-		} while (!list_empty(&local_list));
++		} while (!RB_EMPTY_ROOT(&write_tree));
+ 		blk_finish_plug(&plug);
+ 	}
+ 	return 0;
+@@ -1067,6 +1071,8 @@ static void kcryptd_crypt_write_io_submi
+ 	struct bio *clone = io->ctx.bio_out;
+ 	struct crypt_config *cc = io->cc;
+ 	unsigned long flags;
++	sector_t sector;
++	struct rb_node **p, *parent;
+ 
+ 	if (unlikely(io->error < 0)) {
+ 		crypt_free_buffer_pages(cc, clone);
+@@ -1081,7 +1087,21 @@ static void kcryptd_crypt_write_io_submi
+ 	clone->bi_sector = cc->start + io->sector;
+ 
+ 	spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
+-	list_add_tail(&io->list, &cc->write_thread_list);
++	p = &cc->write_tree.rb_node;
++	parent = NULL;
++	sector = io->sector;
++	while (*p) {
++		parent = *p;
++#define io_node rb_entry(parent, struct dm_crypt_io, rb_node)
++		if (sector < io_node->sector)
++			p = &io_node->rb_node.rb_left;
++		else
++			p = &io_node->rb_node.rb_right;
++#undef io_node
++	}
++	rb_link_node(&io->rb_node, parent, p);
++	rb_insert_color(&io->rb_node, &cc->write_tree);
++
+ 	wake_up_locked(&cc->write_thread_wait);
+ 	spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
+ }
+@@ -1641,7 +1661,7 @@ static int crypt_ctr(struct dm_target *t
+ 	}
+ 
+ 	init_waitqueue_head(&cc->write_thread_wait);
+-	INIT_LIST_HEAD(&cc->write_thread_list);
++	cc->write_tree = RB_ROOT;
+ 
+ 	cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+ 	if (IS_ERR(cc->write_thread)) {
diff --git a/dm-crypt-unbound-workqueue.patch b/dm-crypt-unbound-workqueue.patch
new file mode 100644
index 0000000..0433569
--- /dev/null
+++ b/dm-crypt-unbound-workqueue.patch
@@ -0,0 +1,27 @@
+dm-crypt: use unbound workqueue for request processing
+
+Use unbound workqueue so that work is automatically ballanced between
+available CPUs.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+Index: linux-3.8.6-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.8.6-fast.orig/drivers/md/dm-crypt.c	2013-04-11 17:29:10.000000000 +0200
++++ linux-3.8.6-fast/drivers/md/dm-crypt.c	2013-04-11 17:29:13.000000000 +0200
+@@ -1623,8 +1623,9 @@ static int crypt_ctr(struct dm_target *t
+ 	cc->crypt_queue = alloc_workqueue("kcryptd",
+ 					  WQ_NON_REENTRANT|
+ 					  WQ_CPU_INTENSIVE|
+-					  WQ_MEM_RECLAIM,
+-					  1);
++					  WQ_MEM_RECLAIM|
++					  WQ_UNBOUND,
++					  num_online_cpus());
+ 	if (!cc->crypt_queue) {
+ 		ti->error = "Couldn't create kcryptd queue";
+ 		goto bad;
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/kernel.git/commitdiff/101a744858c305f43487d5dd1610dc9d588b6877



More information about the pld-cvs-commit mailing list