[packages/kernel] - added parallel dm-crypt patches (disabled by default)
lkrotowski
lkrotowski at pld-linux.org
Thu Aug 1 18:00:10 CEST 2013
commit 101a744858c305f43487d5dd1610dc9d588b6877
Author: Łukasz Krotowski <lkrotowski at pld-linux.org>
Date: Wed Jul 17 16:40:20 2013 +0200
- added parallel dm-crypt patches (disabled by default)
dm-crypt-dont-allocate-partial-pages.patch | 251 +++++++++++++++++++++++++++++
dm-crypt-fix-allocation-deadlock.patch | 111 +++++++++++++
dm-crypt-offload-writes-to-thread.patch | 232 ++++++++++++++++++++++++++
dm-crypt-remove-percpu.patch | 185 +++++++++++++++++++++
dm-crypt-sort-requests.patch | 137 ++++++++++++++++
dm-crypt-unbound-workqueue.patch | 27 ++++
kernel.spec | 19 +++
7 files changed, 962 insertions(+)
---
diff --git a/kernel.spec b/kernel.spec
index b998268..ad5c313 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -29,6 +29,7 @@
%bcond_without imq # imq support
%bcond_without esfq # esfq support
%bcond_without ipv6 # ipv6 support
+%bcond_with padmcrypt # parallel dm-crypt
%bcond_without vserver # support for VServer (enabled by default)
@@ -225,6 +226,14 @@ Patch250: kernel-fix_256colors_menuconfig.patch
# https://patchwork.kernel.org/patch/236261/
Patch400: kernel-virtio-gl-accel.patch
+# http://people.redhat.com/mpatocka/patches/kernel/dm-crypt-paralelizace/current/series.html
+Patch500: dm-crypt-remove-percpu.patch
+Patch501: dm-crypt-unbound-workqueue.patch
+Patch502: dm-crypt-dont-allocate-partial-pages.patch
+Patch503: dm-crypt-fix-allocation-deadlock.patch
+Patch504: dm-crypt-offload-writes-to-thread.patch
+Patch505: dm-crypt-sort-requests.patch
+
Patch2000: kernel-small_fixes.patch
Patch2001: kernel-pwc-uncompress.patch
Patch2003: kernel-regressions.patch
@@ -703,6 +712,16 @@ cd linux-%{basever}
# virtio-gl
%patch400 -p1
+# parallel dm-crypt
+%if %{with padmcrypt}
+%patch500 -p1
+%patch501 -p1
+%patch502 -p1
+%patch503 -p1
+%patch504 -p1
+%patch505 -p1
+%endif
+
%endif # vanilla
# Small fixes:
diff --git a/dm-crypt-dont-allocate-partial-pages.patch b/dm-crypt-dont-allocate-partial-pages.patch
new file mode 100644
index 0000000..8354ab8
--- /dev/null
+++ b/dm-crypt-dont-allocate-partial-pages.patch
@@ -0,0 +1,251 @@
+dm-crypt: don't allocate pages for a partial request.
+
+This patch changes crypt_alloc_buffer so that it always allocates pages for
+a full request.
+
+This change enables further simplification and removing of one refcounts
+in the next patches.
+
+Note: the next patch is needed to fix a theoretical deadlock
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c | 133 +++++++++-----------------------------------------
+ 1 file changed, 25 insertions(+), 108 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 21:47:30.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:49:13.000000000 +0200
+@@ -59,7 +59,6 @@ struct dm_crypt_io {
+ atomic_t io_pending;
+ int error;
+ sector_t sector;
+- struct dm_crypt_io *base_io;
+ };
+
+ struct dm_crypt_request {
+@@ -162,7 +161,6 @@ struct crypt_config {
+ };
+
+ #define MIN_IOS 16
+-#define MIN_POOL_PAGES 32
+
+ static struct kmem_cache *_crypt_io_pool;
+
+@@ -777,14 +775,13 @@ static int crypt_convert(struct crypt_co
+ return 0;
+ }
+
++static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
++
+ /*
+ * Generate a new unfragmented bio with the given size
+ * This should never violate the device limitations
+- * May return a smaller bio when running out of pages, indicated by
+- * *out_of_pages set to 1.
+ */
+-static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
+- unsigned *out_of_pages)
++static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
+ {
+ struct crypt_config *cc = io->cc;
+ struct bio *clone;
+@@ -798,37 +795,22 @@ static struct bio *crypt_alloc_buffer(st
+ return NULL;
+
+ clone_init(io, clone);
+- *out_of_pages = 0;
+
+ for (i = 0; i < nr_iovecs; i++) {
+ page = mempool_alloc(cc->page_pool, gfp_mask);
+- if (!page) {
+- *out_of_pages = 1;
+- break;
+- }
+-
+- /*
+- * If additional pages cannot be allocated without waiting,
+- * return a partially-allocated bio. The caller will then try
+- * to allocate more bios while submitting this partial bio.
+- */
+- gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
+
+ len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+
+ if (!bio_add_page(clone, page, len, 0)) {
+ mempool_free(page, cc->page_pool);
+- break;
++ crypt_free_buffer_pages(cc, clone);
++ bio_put(clone);
++ return NULL;
+ }
+
+ size -= len;
+ }
+
+- if (!clone->bi_size) {
+- bio_put(clone);
+- return NULL;
+- }
+-
+ return clone;
+ }
+
+@@ -855,7 +837,6 @@ static struct dm_crypt_io *crypt_io_allo
+ io->base_bio = bio;
+ io->sector = sector;
+ io->error = 0;
+- io->base_io = NULL;
+ io->ctx.req = NULL;
+ atomic_set(&io->io_pending, 0);
+
+@@ -870,13 +851,11 @@ static void crypt_inc_pending(struct dm_
+ /*
+ * One of the bios was finished. Check for completion of
+ * the whole request and correctly clean up the buffer.
+- * If base_io is set, wait for the last fragment to complete.
+ */
+ static void crypt_dec_pending(struct dm_crypt_io *io)
+ {
+ struct crypt_config *cc = io->cc;
+ struct bio *base_bio = io->base_bio;
+- struct dm_crypt_io *base_io = io->base_io;
+ int error = io->error;
+
+ if (!atomic_dec_and_test(&io->io_pending))
+@@ -886,13 +865,7 @@ static void crypt_dec_pending(struct dm_
+ mempool_free(io->ctx.req, cc->req_pool);
+ mempool_free(io, cc->io_pool);
+
+- if (likely(!base_io))
+- bio_endio(base_bio, error);
+- else {
+- if (error && !base_io->error)
+- base_io->error = error;
+- crypt_dec_pending(base_io);
+- }
++ bio_endio(base_bio, error);
+ }
+
+ /*
+@@ -1030,10 +1003,7 @@ static void kcryptd_crypt_write_convert(
+ {
+ struct crypt_config *cc = io->cc;
+ struct bio *clone;
+- struct dm_crypt_io *new_io;
+ int crypt_finished;
+- unsigned out_of_pages = 0;
+- unsigned remaining = io->base_bio->bi_size;
+ sector_t sector = io->sector;
+ int r;
+
+@@ -1043,81 +1013,28 @@ static void kcryptd_crypt_write_convert(
+ crypt_inc_pending(io);
+ crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
+
+- /*
+- * The allocated buffers can be smaller than the whole bio,
+- * so repeat the whole process until all the data can be handled.
+- */
+- while (remaining) {
+- clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
+- if (unlikely(!clone)) {
+- io->error = -ENOMEM;
+- break;
+- }
+-
+- io->ctx.bio_out = clone;
+- io->ctx.idx_out = 0;
+-
+- remaining -= clone->bi_size;
+- sector += bio_sectors(clone);
+-
+- crypt_inc_pending(io);
+-
+- r = crypt_convert(cc, &io->ctx);
+- if (r < 0)
+- io->error = -EIO;
+-
+- crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
+-
+- /* Encryption was already finished, submit io now */
+- if (crypt_finished) {
+- kcryptd_crypt_write_io_submit(io, 0);
+-
+- /*
+- * If there was an error, do not try next fragments.
+- * For async, error is processed in async handler.
+- */
+- if (unlikely(r < 0))
+- break;
++ clone = crypt_alloc_buffer(io, io->base_bio->bi_size);
++ if (unlikely(!clone)) {
++ io->error = -ENOMEM;
++ goto dec;
++ }
+
+- io->sector = sector;
+- }
++ io->ctx.bio_out = clone;
++ io->ctx.idx_out = 0;
+
+- /*
+- * Out of memory -> run queues
+- * But don't wait if split was due to the io size restriction
+- */
+- if (unlikely(out_of_pages))
+- congestion_wait(BLK_RW_ASYNC, HZ/100);
++ sector += bio_sectors(clone);
+
+- /*
+- * With async crypto it is unsafe to share the crypto context
+- * between fragments, so switch to a new dm_crypt_io structure.
+- */
+- if (unlikely(!crypt_finished && remaining)) {
+- new_io = crypt_io_alloc(io->cc, io->base_bio,
+- sector);
+- crypt_inc_pending(new_io);
+- crypt_convert_init(cc, &new_io->ctx, NULL,
+- io->base_bio, sector);
+- new_io->ctx.idx_in = io->ctx.idx_in;
+- new_io->ctx.offset_in = io->ctx.offset_in;
+-
+- /*
+- * Fragments after the first use the base_io
+- * pending count.
+- */
+- if (!io->base_io)
+- new_io->base_io = io;
+- else {
+- new_io->base_io = io->base_io;
+- crypt_inc_pending(io->base_io);
+- crypt_dec_pending(io);
+- }
++ crypt_inc_pending(io);
++ r = crypt_convert(cc, &io->ctx);
++ if (r)
++ io->error = -EIO;
++ crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
+
+- io = new_io;
+- }
+- }
++ /* Encryption was already finished, submit io now */
++ if (crypt_finished)
++ kcryptd_crypt_write_io_submit(io, 0);
+
++dec:
+ crypt_dec_pending(io);
+ }
+
+@@ -1556,7 +1473,7 @@ static int crypt_ctr(struct dm_target *t
+ goto bad;
+ }
+
+- cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
++ cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
+ if (!cc->page_pool) {
+ ti->error = "Cannot allocate page mempool";
+ goto bad;
diff --git a/dm-crypt-fix-allocation-deadlock.patch b/dm-crypt-fix-allocation-deadlock.patch
new file mode 100644
index 0000000..8f439da
--- /dev/null
+++ b/dm-crypt-fix-allocation-deadlock.patch
@@ -0,0 +1,111 @@
+dm-crypt: avoid deadlock in mempools
+
+This patch fixes a theoretical deadlock introduced in the previous patch.
+
+The function crypt_alloc_buffer may be called concurrently. If we allocate
+from the mempool concurrently, there is a possibility of deadlock.
+For example, if we have mempool of 256 pages, two processes, each wanting 256,
+pages allocate from the mempool concurrently, it may deadlock in a situation
+where both processes have allocated 128 pages and the mempool is exhausted.
+
+In order to avoid this scenarios, we allocate the pages under a mutex.
+
+In order to not degrade performance with excessive locking, we try
+non-blocking allocations without a mutex first and if it fails, we fallback
+to a blocking allocation with a mutex.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c | 36 +++++++++++++++++++++++++++++++++---
+ 1 file changed, 33 insertions(+), 3 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 22:49:13.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:49:53.000000000 +0200
+@@ -118,6 +118,7 @@ struct crypt_config {
+ mempool_t *req_pool;
+ mempool_t *page_pool;
+ struct bio_set *bs;
++ struct mutex bio_alloc_lock;
+
+ struct workqueue_struct *io_queue;
+ struct workqueue_struct *crypt_queue;
+@@ -780,24 +781,46 @@ static void crypt_free_buffer_pages(stru
+ /*
+ * Generate a new unfragmented bio with the given size
+ * This should never violate the device limitations
++ *
++ * This function may be called concurrently. If we allocate from the mempool
++ * concurrently, there is a possibility of deadlock. For example, if we have
++ * mempool of 256 pages, two processes, each wanting 256, pages allocate from
++ * the mempool concurrently, it may deadlock in a situation where both processes
++ * have allocated 128 pages and the mempool is exhausted.
++ *
++ * In order to avoid this scenarios, we allocate the pages under a mutex.
++ *
++ * In order to not degrade performance with excessive locking, we try
++ * non-blocking allocations without a mutex first and if it fails, we fallback
++ * to a blocking allocation with a mutex.
+ */
+ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
+ {
+ struct crypt_config *cc = io->cc;
+ struct bio *clone;
+ unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
++ gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
+ unsigned i, len;
+ struct page *page;
+
++retry:
++ if (unlikely(gfp_mask & __GFP_WAIT))
++ mutex_lock(&cc->bio_alloc_lock);
++
+ clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
+ if (!clone)
+- return NULL;
++ goto return_clone;
+
+ clone_init(io, clone);
+
+ for (i = 0; i < nr_iovecs; i++) {
+ page = mempool_alloc(cc->page_pool, gfp_mask);
++ if (!page) {
++ crypt_free_buffer_pages(cc, clone);
++ bio_put(clone);
++ gfp_mask |= __GFP_WAIT;
++ goto retry;
++ }
+
+ len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+
+@@ -805,12 +828,17 @@ static struct bio *crypt_alloc_buffer(st
+ mempool_free(page, cc->page_pool);
+ crypt_free_buffer_pages(cc, clone);
+ bio_put(clone);
+- return NULL;
++ clone = NULL;
++ goto return_clone;
+ }
+
+ size -= len;
+ }
+
++return_clone:
++ if (unlikely(gfp_mask & __GFP_WAIT))
++ mutex_unlock(&cc->bio_alloc_lock);
++
+ return clone;
+ }
+
+@@ -1485,6 +1513,8 @@ static int crypt_ctr(struct dm_target *t
+ goto bad;
+ }
+
++ mutex_init(&cc->bio_alloc_lock);
++
+ ret = -EINVAL;
+ if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
+ ti->error = "Invalid iv_offset sector";
diff --git a/dm-crypt-offload-writes-to-thread.patch b/dm-crypt-offload-writes-to-thread.patch
new file mode 100644
index 0000000..da2e68f
--- /dev/null
+++ b/dm-crypt-offload-writes-to-thread.patch
@@ -0,0 +1,232 @@
+dm-crypt: offload writes to thread
+
+Submitting write bios directly in the encryption thread caused serious
+performance degradation. On multiprocessor machine encryption requests
+finish in a different order than they were submitted in. Consequently, write
+requests would be submitted in a different order and it could cause severe
+performance degradation.
+
+This patch moves submitting write requests to a separate thread so that
+the requests can be sorted before submitting.
+
+Sorting is implemented in the next patch.
+
+Note: it is required that a previous patch "dm-crypt: don't allocate pages
+for a partial request." is applied before applying this patch. Without
+that, this patch could introduce a crash.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c | 120 ++++++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 97 insertions(+), 23 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 22:49:53.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:49:57.000000000 +0200
+@@ -17,6 +17,7 @@
+ #include <linux/slab.h>
+ #include <linux/crypto.h>
+ #include <linux/workqueue.h>
++#include <linux/kthread.h>
+ #include <linux/backing-dev.h>
+ #include <linux/atomic.h>
+ #include <linux/scatterlist.h>
+@@ -59,6 +60,8 @@ struct dm_crypt_io {
+ atomic_t io_pending;
+ int error;
+ sector_t sector;
++
++ struct list_head list;
+ };
+
+ struct dm_crypt_request {
+@@ -123,6 +126,10 @@ struct crypt_config {
+ struct workqueue_struct *io_queue;
+ struct workqueue_struct *crypt_queue;
+
++ struct task_struct *write_thread;
++ wait_queue_head_t write_thread_wait;
++ struct list_head write_thread_list;
++
+ char *cipher;
+ char *cipher_string;
+
+@@ -977,37 +984,89 @@ static int kcryptd_io_read(struct dm_cry
+ return 0;
+ }
+
++static void kcryptd_io_read_work(struct work_struct *work)
++{
++ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
++
++ crypt_inc_pending(io);
++ if (kcryptd_io_read(io, GFP_NOIO))
++ io->error = -ENOMEM;
++ crypt_dec_pending(io);
++}
++
++static void kcryptd_queue_read(struct dm_crypt_io *io)
++{
++ struct crypt_config *cc = io->cc;
++
++ INIT_WORK(&io->work, kcryptd_io_read_work);
++ queue_work(cc->io_queue, &io->work);
++}
++
+ static void kcryptd_io_write(struct dm_crypt_io *io)
+ {
+ struct bio *clone = io->ctx.bio_out;
++
+ generic_make_request(clone);
+ }
+
+-static void kcryptd_io(struct work_struct *work)
++static int dmcrypt_write(void *data)
+ {
+- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
++ struct crypt_config *cc = data;
++ while (1) {
++ struct list_head local_list;
++ struct blk_plug plug;
+
+- if (bio_data_dir(io->base_bio) == READ) {
+- crypt_inc_pending(io);
+- if (kcryptd_io_read(io, GFP_NOIO))
+- io->error = -ENOMEM;
+- crypt_dec_pending(io);
+- } else
+- kcryptd_io_write(io);
+-}
++ DECLARE_WAITQUEUE(wait, current);
+
+-static void kcryptd_queue_io(struct dm_crypt_io *io)
+-{
+- struct crypt_config *cc = io->cc;
++ spin_lock_irq(&cc->write_thread_wait.lock);
++continue_locked:
+
+- INIT_WORK(&io->work, kcryptd_io);
+- queue_work(cc->io_queue, &io->work);
++ if (!list_empty(&cc->write_thread_list))
++ goto pop_from_list;
++
++ __set_current_state(TASK_INTERRUPTIBLE);
++ __add_wait_queue(&cc->write_thread_wait, &wait);
++
++ spin_unlock_irq(&cc->write_thread_wait.lock);
++
++ if (unlikely(kthread_should_stop())) {
++ set_task_state(current, TASK_RUNNING);
++ remove_wait_queue(&cc->write_thread_wait, &wait);
++ break;
++ }
++
++ schedule();
++
++ set_task_state(current, TASK_RUNNING);
++ spin_lock_irq(&cc->write_thread_wait.lock);
++ __remove_wait_queue(&cc->write_thread_wait, &wait);
++ goto continue_locked;
++
++pop_from_list:
++ local_list = cc->write_thread_list;
++ local_list.next->prev = &local_list;
++ local_list.prev->next = &local_list;
++ INIT_LIST_HEAD(&cc->write_thread_list);
++
++ spin_unlock_irq(&cc->write_thread_wait.lock);
++
++ blk_start_plug(&plug);
++ do {
++ struct dm_crypt_io *io = container_of(local_list.next,
++ struct dm_crypt_io, list);
++ list_del(&io->list);
++ kcryptd_io_write(io);
++ } while (!list_empty(&local_list));
++ blk_finish_plug(&plug);
++ }
++ return 0;
+ }
+
+-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
++static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io)
+ {
+ struct bio *clone = io->ctx.bio_out;
+ struct crypt_config *cc = io->cc;
++ unsigned long flags;
+
+ if (unlikely(io->error < 0)) {
+ crypt_free_buffer_pages(cc, clone);
+@@ -1021,10 +1080,10 @@ static void kcryptd_crypt_write_io_submi
+
+ clone->bi_sector = cc->start + io->sector;
+
+- if (async)
+- kcryptd_queue_io(io);
+- else
+- generic_make_request(clone);
++ spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
++ list_add_tail(&io->list, &cc->write_thread_list);
++ wake_up_locked(&cc->write_thread_wait);
++ spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
+ }
+
+ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
+@@ -1060,7 +1119,7 @@ static void kcryptd_crypt_write_convert(
+
+ /* Encryption was already finished, submit io now */
+ if (crypt_finished)
+- kcryptd_crypt_write_io_submit(io, 0);
++ kcryptd_crypt_write_io_submit(io);
+
+ dec:
+ crypt_dec_pending(io);
+@@ -1118,7 +1177,7 @@ static void kcryptd_async_done(struct cr
+ if (bio_data_dir(io->base_bio) == READ)
+ kcryptd_crypt_read_done(io);
+ else
+- kcryptd_crypt_write_io_submit(io, 1);
++ kcryptd_crypt_write_io_submit(io);
+ }
+
+ static void kcryptd_crypt(struct work_struct *work)
+@@ -1262,6 +1321,9 @@ static void crypt_dtr(struct dm_target *
+ if (!cc)
+ return;
+
++ if (cc->write_thread)
++ kthread_stop(cc->write_thread);
++
+ if (cc->io_queue)
+ destroy_workqueue(cc->io_queue);
+ if (cc->crypt_queue)
+@@ -1578,6 +1640,18 @@ static int crypt_ctr(struct dm_target *t
+ goto bad;
+ }
+
++ init_waitqueue_head(&cc->write_thread_wait);
++ INIT_LIST_HEAD(&cc->write_thread_list);
++
++ cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
++ if (IS_ERR(cc->write_thread)) {
++ ret = PTR_ERR(cc->write_thread);
++ cc->write_thread = NULL;
++ ti->error = "Couldn't spawn write thread";
++ goto bad;
++ }
++ wake_up_process(cc->write_thread);
++
+ ti->num_flush_bios = 1;
+ ti->discard_zeroes_data_unsupported = true;
+
+@@ -1611,7 +1685,7 @@ static int crypt_map(struct dm_target *t
+
+ if (bio_data_dir(io->base_bio) == READ) {
+ if (kcryptd_io_read(io, GFP_NOWAIT))
+- kcryptd_queue_io(io);
++ kcryptd_queue_read(io);
+ } else
+ kcryptd_queue_crypt(io);
+
diff --git a/dm-crypt-remove-percpu.patch b/dm-crypt-remove-percpu.patch
new file mode 100644
index 0000000..2467276
--- /dev/null
+++ b/dm-crypt-remove-percpu.patch
@@ -0,0 +1,185 @@
+dm-crypt: remove per-cpu structure
+
+Remove per-cpu structure and make it per-convert_context instead.
+This allows moving requests between different cpus.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c | 61 +++++++++-----------------------------------------
+ 1 file changed, 12 insertions(+), 49 deletions(-)
+
+Index: linux-3.8.6-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.8.6-fast.orig/drivers/md/dm-crypt.c 2013-04-11 17:29:10.000000000 +0200
++++ linux-3.8.6-fast/drivers/md/dm-crypt.c 2013-04-11 17:29:10.000000000 +0200
+@@ -18,7 +18,6 @@
+ #include <linux/crypto.h>
+ #include <linux/workqueue.h>
+ #include <linux/backing-dev.h>
+-#include <linux/percpu.h>
+ #include <linux/atomic.h>
+ #include <linux/scatterlist.h>
+ #include <asm/page.h>
+@@ -44,6 +43,7 @@ struct convert_context {
+ unsigned int idx_out;
+ sector_t cc_sector;
+ atomic_t cc_pending;
++ struct ablkcipher_request *req;
+ };
+
+ /*
+@@ -105,15 +105,7 @@ struct iv_lmk_private {
+ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
+
+ /*
+- * Duplicated per-CPU state for cipher.
+- */
+-struct crypt_cpu {
+- struct ablkcipher_request *req;
+-};
+-
+-/*
+- * The fields in here must be read only after initialization,
+- * changing state should be in crypt_cpu.
++ * The fields in here must be read only after initialization.
+ */
+ struct crypt_config {
+ struct dm_dev *dev;
+@@ -143,12 +135,6 @@ struct crypt_config {
+ sector_t iv_offset;
+ unsigned int iv_size;
+
+- /*
+- * Duplicated per cpu state. Access through
+- * per_cpu_ptr() only.
+- */
+- struct crypt_cpu __percpu *cpu;
+-
+ /* ESSIV: struct crypto_cipher *essiv_tfm */
+ void *iv_private;
+ struct crypto_ablkcipher **tfms;
+@@ -184,11 +170,6 @@ static void clone_init(struct dm_crypt_i
+ static void kcryptd_queue_crypt(struct dm_crypt_io *io);
+ static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq);
+
+-static struct crypt_cpu *this_crypt_config(struct crypt_config *cc)
+-{
+- return this_cpu_ptr(cc->cpu);
+-}
+-
+ /*
+ * Use this to access cipher attributes that are the same for each CPU.
+ */
+@@ -738,16 +719,15 @@ static void kcryptd_async_done(struct cr
+ static void crypt_alloc_req(struct crypt_config *cc,
+ struct convert_context *ctx)
+ {
+- struct crypt_cpu *this_cc = this_crypt_config(cc);
+ unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
+
+- if (!this_cc->req)
+- this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
++ if (!ctx->req)
++ ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+
+- ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]);
+- ablkcipher_request_set_callback(this_cc->req,
++ ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]);
++ ablkcipher_request_set_callback(ctx->req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+- kcryptd_async_done, dmreq_of_req(cc, this_cc->req));
++ kcryptd_async_done, dmreq_of_req(cc, ctx->req));
+ }
+
+ /*
+@@ -756,7 +736,6 @@ static void crypt_alloc_req(struct crypt
+ static int crypt_convert(struct crypt_config *cc,
+ struct convert_context *ctx)
+ {
+- struct crypt_cpu *this_cc = this_crypt_config(cc);
+ int r;
+
+ atomic_set(&ctx->cc_pending, 1);
+@@ -768,7 +747,7 @@ static int crypt_convert(struct crypt_co
+
+ atomic_inc(&ctx->cc_pending);
+
+- r = crypt_convert_block(cc, ctx, this_cc->req);
++ r = crypt_convert_block(cc, ctx, ctx->req);
+
+ switch (r) {
+ /* async */
+@@ -777,7 +756,7 @@ static int crypt_convert(struct crypt_co
+ INIT_COMPLETION(ctx->restart);
+ /* fall through*/
+ case -EINPROGRESS:
+- this_cc->req = NULL;
++ ctx->req = NULL;
+ ctx->cc_sector++;
+ continue;
+
+@@ -877,6 +856,7 @@ static struct dm_crypt_io *crypt_io_allo
+ io->sector = sector;
+ io->error = 0;
+ io->base_io = NULL;
++ io->ctx.req = NULL;
+ atomic_set(&io->io_pending, 0);
+
+ return io;
+@@ -902,6 +882,8 @@ static void crypt_dec_pending(struct dm_
+ if (!atomic_dec_and_test(&io->io_pending))
+ return;
+
++ if (io->ctx.req)
++ mempool_free(io->ctx.req, cc->req_pool);
+ mempool_free(io, cc->io_pool);
+
+ if (likely(!base_io))
+@@ -1329,8 +1311,6 @@ static int crypt_wipe_key(struct crypt_c
+ static void crypt_dtr(struct dm_target *ti)
+ {
+ struct crypt_config *cc = ti->private;
+- struct crypt_cpu *cpu_cc;
+- int cpu;
+
+ ti->private = NULL;
+
+@@ -1342,13 +1322,6 @@ static void crypt_dtr(struct dm_target *
+ if (cc->crypt_queue)
+ destroy_workqueue(cc->crypt_queue);
+
+- if (cc->cpu)
+- for_each_possible_cpu(cpu) {
+- cpu_cc = per_cpu_ptr(cc->cpu, cpu);
+- if (cpu_cc->req)
+- mempool_free(cpu_cc->req, cc->req_pool);
+- }
+-
+ crypt_free_tfms(cc);
+
+ if (cc->bs)
+@@ -1367,9 +1340,6 @@ static void crypt_dtr(struct dm_target *
+ if (cc->dev)
+ dm_put_device(ti, cc->dev);
+
+- if (cc->cpu)
+- free_percpu(cc->cpu);
+-
+ kzfree(cc->cipher);
+ kzfree(cc->cipher_string);
+
+@@ -1424,13 +1394,6 @@ static int crypt_ctr_cipher(struct dm_ta
+ if (tmp)
+ DMWARN("Ignoring unexpected additional cipher options");
+
+- cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)),
+- __alignof__(struct crypt_cpu));
+- if (!cc->cpu) {
+- ti->error = "Cannot allocate per cpu state";
+- goto bad_mem;
+- }
+-
+ /*
+ * For compatibility with the original dm-crypt mapping format, if
+ * only the cipher name is supplied, use cbc-plain.
diff --git a/dm-crypt-sort-requests.patch b/dm-crypt-sort-requests.patch
new file mode 100644
index 0000000..90bfbae
--- /dev/null
+++ b/dm-crypt-sort-requests.patch
@@ -0,0 +1,137 @@
+dm-crypt: sort writes
+
+Write requests are sorted in a red-black tree structure and are submitted
+in the sorted order.
+
+In theory the sorting should be performed by the underlying disk scheduler,
+however, in practice the disk scheduler accepts and sorts only 128 requests.
+In order to sort more requests, we need to implement our own sorting.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c | 50 +++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 35 insertions(+), 15 deletions(-)
+
+Index: linux-3.9.2-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.9.2-fast.orig/drivers/md/dm-crypt.c 2013-05-15 22:49:57.000000000 +0200
++++ linux-3.9.2-fast/drivers/md/dm-crypt.c 2013-05-15 22:50:01.000000000 +0200
+@@ -21,6 +21,7 @@
+ #include <linux/backing-dev.h>
+ #include <linux/atomic.h>
+ #include <linux/scatterlist.h>
++#include <linux/rbtree.h>
+ #include <asm/page.h>
+ #include <asm/unaligned.h>
+ #include <crypto/hash.h>
+@@ -61,7 +62,7 @@ struct dm_crypt_io {
+ int error;
+ sector_t sector;
+
+- struct list_head list;
++ struct rb_node rb_node;
+ };
+
+ struct dm_crypt_request {
+@@ -128,7 +129,7 @@ struct crypt_config {
+
+ struct task_struct *write_thread;
+ wait_queue_head_t write_thread_wait;
+- struct list_head write_thread_list;
++ struct rb_root write_tree;
+
+ char *cipher;
+ char *cipher_string;
+@@ -1013,7 +1014,7 @@ static int dmcrypt_write(void *data)
+ {
+ struct crypt_config *cc = data;
+ while (1) {
+- struct list_head local_list;
++ struct rb_root write_tree;
+ struct blk_plug plug;
+
+ DECLARE_WAITQUEUE(wait, current);
+@@ -1021,7 +1022,7 @@ static int dmcrypt_write(void *data)
+ spin_lock_irq(&cc->write_thread_wait.lock);
+ continue_locked:
+
+- if (!list_empty(&cc->write_thread_list))
++ if (!RB_EMPTY_ROOT(&cc->write_tree))
+ goto pop_from_list;
+
+ __set_current_state(TASK_INTERRUPTIBLE);
+@@ -1043,20 +1044,23 @@ continue_locked:
+ goto continue_locked;
+
+ pop_from_list:
+- local_list = cc->write_thread_list;
+- local_list.next->prev = &local_list;
+- local_list.prev->next = &local_list;
+- INIT_LIST_HEAD(&cc->write_thread_list);
+-
++ write_tree = cc->write_tree;
++ cc->write_tree = RB_ROOT;
+ spin_unlock_irq(&cc->write_thread_wait.lock);
+
++ BUG_ON(rb_parent(write_tree.rb_node));
++
++ /*
++ * Note: we cannot walk the tree here with rb_next because
++ * the structures may be freed when kcryptd_io_write is called.
++ */
+ blk_start_plug(&plug);
+ do {
+- struct dm_crypt_io *io = container_of(local_list.next,
+- struct dm_crypt_io, list);
+- list_del(&io->list);
++ struct dm_crypt_io *io = rb_entry(rb_first(&write_tree),
++ struct dm_crypt_io, rb_node);
++ rb_erase(&io->rb_node, &write_tree);
+ kcryptd_io_write(io);
+- } while (!list_empty(&local_list));
++ } while (!RB_EMPTY_ROOT(&write_tree));
+ blk_finish_plug(&plug);
+ }
+ return 0;
+@@ -1067,6 +1071,8 @@ static void kcryptd_crypt_write_io_submi
+ struct bio *clone = io->ctx.bio_out;
+ struct crypt_config *cc = io->cc;
+ unsigned long flags;
++ sector_t sector;
++ struct rb_node **p, *parent;
+
+ if (unlikely(io->error < 0)) {
+ crypt_free_buffer_pages(cc, clone);
+@@ -1081,7 +1087,21 @@ static void kcryptd_crypt_write_io_submi
+ clone->bi_sector = cc->start + io->sector;
+
+ spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
+- list_add_tail(&io->list, &cc->write_thread_list);
++ p = &cc->write_tree.rb_node;
++ parent = NULL;
++ sector = io->sector;
++ while (*p) {
++ parent = *p;
++#define io_node rb_entry(parent, struct dm_crypt_io, rb_node)
++ if (sector < io_node->sector)
++ p = &io_node->rb_node.rb_left;
++ else
++ p = &io_node->rb_node.rb_right;
++#undef io_node
++ }
++ rb_link_node(&io->rb_node, parent, p);
++ rb_insert_color(&io->rb_node, &cc->write_tree);
++
+ wake_up_locked(&cc->write_thread_wait);
+ spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
+ }
+@@ -1641,7 +1661,7 @@ static int crypt_ctr(struct dm_target *t
+ }
+
+ init_waitqueue_head(&cc->write_thread_wait);
+- INIT_LIST_HEAD(&cc->write_thread_list);
++ cc->write_tree = RB_ROOT;
+
+ cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+ if (IS_ERR(cc->write_thread)) {
diff --git a/dm-crypt-unbound-workqueue.patch b/dm-crypt-unbound-workqueue.patch
new file mode 100644
index 0000000..0433569
--- /dev/null
+++ b/dm-crypt-unbound-workqueue.patch
@@ -0,0 +1,27 @@
+dm-crypt: use unbound workqueue for request processing
+
+Use unbound workqueue so that work is automatically ballanced between
+available CPUs.
+
+Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
+
+---
+ drivers/md/dm-crypt.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+Index: linux-3.8.6-fast/drivers/md/dm-crypt.c
+===================================================================
+--- linux-3.8.6-fast.orig/drivers/md/dm-crypt.c 2013-04-11 17:29:10.000000000 +0200
++++ linux-3.8.6-fast/drivers/md/dm-crypt.c 2013-04-11 17:29:13.000000000 +0200
+@@ -1623,8 +1623,9 @@ static int crypt_ctr(struct dm_target *t
+ cc->crypt_queue = alloc_workqueue("kcryptd",
+ WQ_NON_REENTRANT|
+ WQ_CPU_INTENSIVE|
+- WQ_MEM_RECLAIM,
+- 1);
++ WQ_MEM_RECLAIM|
++ WQ_UNBOUND,
++ num_online_cpus());
+ if (!cc->crypt_queue) {
+ ti->error = "Couldn't create kcryptd queue";
+ goto bad;
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/kernel.git/commitdiff/101a744858c305f43487d5dd1610dc9d588b6877
More information about the pld-cvs-commit
mailing list