[PATCH v3 2/2] md: dm-crypt: Introduce the bulk IV mode for bulk crypto

2015-12-16 Thread Baolin Wang
In now dm-crypt code, it is ineffective to map one segment (always one
sector) of one bio with just only one scatterlist at one time for hardware
crypto engine. Especially for some encryption mode (like ecb or xts mode)
cooperating with the crypto engine, they just need one initial IV or null
IV instead of different IV for each sector. In this situation We can consider
to use multiple scatterlists to map the whole bio and send all scatterlists
of one bio to crypto engine to encrypt or decrypt, which can improve the
hardware engine's efficiency.

With this optimization, On my test setup (beaglebone black board) using 64KB
I/Os on an eMMC storage device I saw about 60% improvement in throughput for
encrypted writes, and about 100% improvement for encrypted reads. But this
is not fit for other modes which need different IV for each sector.

Signed-off-by: Baolin Wang 
---
 drivers/md/dm-crypt.c |  333 -
 1 file changed, 327 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 917d47e..003d2e9 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -32,6 +32,7 @@
 #include 
 
 #define DM_MSG_PREFIX "crypt"
+#define DM_MAX_SG_LIST 1024
 
 /*
  * context holding the current state of a multi-part conversion
@@ -68,6 +69,8 @@ struct dm_crypt_request {
struct convert_context *ctx;
struct scatterlist sg_in;
struct scatterlist sg_out;
+   struct sg_table sgt_in;
+   struct sg_table sgt_out;
sector_t iv_sector;
 };
 
@@ -140,6 +143,7 @@ struct crypt_config {
char *cipher;
char *cipher_string;
 
+   int bulk_crypto;
struct crypt_iv_operations *iv_gen_ops;
union {
struct iv_essiv_private essiv;
@@ -238,6 +242,9 @@ static struct crypto_ablkcipher *any_tfm(struct 
crypt_config *cc)
  *
  * plumb: unimplemented, see:
  * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
+ *
+ * bulk: the initial vector is the 64-bit little-endian version of the sector
+ *  number, which is used as just one initial IV for one bulk data.
  */
 
 static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
@@ -755,6 +762,15 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 
*iv,
return r;
 }
 
+static int crypt_iv_bulk_gen(struct crypt_config *cc, u8 *iv,
+struct dm_crypt_request *dmreq)
+{
+   memset(iv, 0, cc->iv_size);
+   *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
+
+   return 0;
+}
+
 static struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
 };
@@ -799,6 +815,10 @@ static struct crypt_iv_operations crypt_iv_tcw_ops = {
.post  = crypt_iv_tcw_post
 };
 
+static struct crypt_iv_operations crypt_iv_bulk_ops = {
+   .generator = crypt_iv_bulk_gen
+};
+
 static void crypt_convert_init(struct crypt_config *cc,
   struct convert_context *ctx,
   struct bio *bio_out, struct bio *bio_in,
@@ -833,6 +853,11 @@ static u8 *iv_of_dmreq(struct crypt_config *cc,
crypto_ablkcipher_alignmask(any_tfm(cc)) + 1);
 }
 
+static int crypt_is_bulk_mode(struct crypt_config *cc)
+{
+   return cc->bulk_crypto;
+}
+
 static int crypt_convert_block(struct crypt_config *cc,
   struct convert_context *ctx,
   struct ablkcipher_request *req)
@@ -881,24 +906,40 @@ static int crypt_convert_block(struct crypt_config *cc,
 
 static void kcryptd_async_done(struct crypto_async_request *async_req,
   int error);
+static void kcryptd_async_all_done(struct crypto_async_request *async_req,
+  int error);
 
 static void crypt_alloc_req(struct crypt_config *cc,
struct convert_context *ctx)
 {
unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
+   struct dm_crypt_request *dmreq;
 
if (!ctx->req)
ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO);
 
+   dmreq = dmreq_of_req(cc, ctx->req);
+   dmreq->sgt_in.orig_nents = 0;
+   dmreq->sgt_out.orig_nents = 0;
+
ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]);
 
/*
 * Use REQ_MAY_BACKLOG so a cipher driver internally backlogs
 * requests if driver request queue is full.
 */
-   ablkcipher_request_set_callback(ctx->req,
-   CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-   kcryptd_async_done, dmreq_of_req(cc, ctx->req));
+   if (crypt_is_bulk_mode(cc))
+   ablkcipher_request_set_callback(ctx->req,
+   CRYPTO_TFM_REQ_MAY_BACKLOG
+   | CRYPTO_TFM_REQ_MAY_SLEEP,
+   kcryptd_async_all_done,
+

[PATCH v3 2/2] md: dm-crypt: Introduce the bulk IV mode for bulk crypto

2015-12-16 Thread Baolin Wang
In now dm-crypt code, it is ineffective to map one segment (always one
sector) of one bio with just only one scatterlist at one time for hardware
crypto engine. Especially for some encryption mode (like ecb or xts mode)
cooperating with the crypto engine, they just need one initial IV or null
IV instead of different IV for each sector. In this situation We can consider
to use multiple scatterlists to map the whole bio and send all scatterlists
of one bio to crypto engine to encrypt or decrypt, which can improve the
hardware engine's efficiency.

With this optimization, On my test setup (beaglebone black board) using 64KB
I/Os on an eMMC storage device I saw about 60% improvement in throughput for
encrypted writes, and about 100% improvement for encrypted reads. But this
is not fit for other modes which need different IV for each sector.

Signed-off-by: Baolin Wang 
---
 drivers/md/dm-crypt.c |  333 -
 1 file changed, 327 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 917d47e..003d2e9 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -32,6 +32,7 @@
 #include 
 
 #define DM_MSG_PREFIX "crypt"
+#define DM_MAX_SG_LIST 1024
 
 /*
  * context holding the current state of a multi-part conversion
@@ -68,6 +69,8 @@ struct dm_crypt_request {
struct convert_context *ctx;
struct scatterlist sg_in;
struct scatterlist sg_out;
+   struct sg_table sgt_in;
+   struct sg_table sgt_out;
sector_t iv_sector;
 };
 
@@ -140,6 +143,7 @@ struct crypt_config {
char *cipher;
char *cipher_string;
 
+   int bulk_crypto;
struct crypt_iv_operations *iv_gen_ops;
union {
struct iv_essiv_private essiv;
@@ -238,6 +242,9 @@ static struct crypto_ablkcipher *any_tfm(struct 
crypt_config *cc)
  *
  * plumb: unimplemented, see:
  * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
+ *
+ * bulk: the initial vector is the 64-bit little-endian version of the sector
+ *  number, which is used as just one initial IV for one bulk data.
  */
 
 static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
@@ -755,6 +762,15 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 
*iv,
return r;
 }
 
+static int crypt_iv_bulk_gen(struct crypt_config *cc, u8 *iv,
+struct dm_crypt_request *dmreq)
+{
+   memset(iv, 0, cc->iv_size);
+   *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
+
+   return 0;
+}
+
 static struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
 };
@@ -799,6 +815,10 @@ static struct crypt_iv_operations crypt_iv_tcw_ops = {
.post  = crypt_iv_tcw_post
 };
 
+static struct crypt_iv_operations crypt_iv_bulk_ops = {
+   .generator = crypt_iv_bulk_gen
+};
+
 static void crypt_convert_init(struct crypt_config *cc,
   struct convert_context *ctx,
   struct bio *bio_out, struct bio *bio_in,
@@ -833,6 +853,11 @@ static u8 *iv_of_dmreq(struct crypt_config *cc,
crypto_ablkcipher_alignmask(any_tfm(cc)) + 1);
 }
 
+static int crypt_is_bulk_mode(struct crypt_config *cc)
+{
+   return cc->bulk_crypto;
+}
+
 static int crypt_convert_block(struct crypt_config *cc,
   struct convert_context *ctx,
   struct ablkcipher_request *req)
@@ -881,24 +906,40 @@ static int crypt_convert_block(struct crypt_config *cc,
 
 static void kcryptd_async_done(struct crypto_async_request *async_req,
   int error);
+static void kcryptd_async_all_done(struct crypto_async_request *async_req,
+  int error);
 
 static void crypt_alloc_req(struct crypt_config *cc,
struct convert_context *ctx)
 {
unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
+   struct dm_crypt_request *dmreq;
 
if (!ctx->req)
ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO);
 
+   dmreq = dmreq_of_req(cc, ctx->req);
+   dmreq->sgt_in.orig_nents = 0;
+   dmreq->sgt_out.orig_nents = 0;
+
ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]);
 
/*
 * Use REQ_MAY_BACKLOG so a cipher driver internally backlogs
 * requests if driver request queue is full.
 */
-   ablkcipher_request_set_callback(ctx->req,
-   CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-   kcryptd_async_done, dmreq_of_req(cc, ctx->req));
+   if (crypt_is_bulk_mode(cc))
+   ablkcipher_request_set_callback(ctx->req,
+   CRYPTO_TFM_REQ_MAY_BACKLOG
+   | CRYPTO_TFM_REQ_MAY_SLEEP,
+   kcryptd_async_all_done,
+