Re: [PATCH v2 04/14] crypto: omap-aes: Simplify DMA usage by using direct SGs

2013-08-20 Thread Lokesh Vutla
Hi Joel,

On Sunday 18 August 2013 08:12 AM, Joel Fernandes wrote:
 In early version of this driver, assumptions were made such as DMA layer
 requires contiguous buffers etc. Due to this, new buffers were allocated,
 mapped and used for DMA. These assumptions are no longer true and DMAEngine
 scatter-gather DMA doesn't have such requirements. We simply the DMA 
 operations
 by directly using the scatter-gather buffers provided by the crypto layer
 instead of creating our own.
 
 Lot of logic that handled DMA'ing only X number of bytes of the total, or as
 much as fitted into a 3rd party buffer is removed and is no longer required.
 
 Also, good performance improvement of atleast ~20% seen with encrypting a
 buffer size of 8K (1800 ops/sec vs 1400 ops/sec).  Improvement will be higher
 for much larger blocks though such benchmarking is left as an exercise for the
 reader.  Also DMA usage is much more simplified and coherent with rest of the
 code.
 
 Signed-off-by: Joel Fernandes jo...@ti.com
 ---
  drivers/crypto/omap-aes.c |  147 
 -
  1 file changed, 25 insertions(+), 122 deletions(-)
 
 diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
 index e369e6e..64dd5c1 100644
 --- a/drivers/crypto/omap-aes.c
 +++ b/drivers/crypto/omap-aes.c
 @@ -480,22 +480,14 @@ static int sg_copy(struct scatterlist **sg, size_t 
 *offset, void *buf,
  }
  
  static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
 - struct scatterlist *in_sg, struct scatterlist *out_sg)
 + struct scatterlist *in_sg, struct scatterlist *out_sg,
 + int in_sg_len, int out_sg_len)
  {
   struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
   struct omap_aes_dev *dd = ctx-dd;
   struct dma_async_tx_descriptor *tx_in, *tx_out;
   struct dma_slave_config cfg;
 - dma_addr_t dma_addr_in = sg_dma_address(in_sg);
 - int ret, length = sg_dma_len(in_sg);
 -
 - pr_debug(len: %d\n, length);
 -
 - dd-dma_size = length;
 -
 - if (!(dd-flags  FLAGS_FAST))
 - dma_sync_single_for_device(dd-dev, dma_addr_in, length,
 -DMA_TO_DEVICE);
 + int ret;
By this change FLAGS_FAST is unsed, it can be cleaned right?
or Am I missing something?

Thanks and regards,
Lokesh
  
   memset(cfg, 0, sizeof(cfg));
  
 @@ -514,7 +506,7 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
   return ret;
   }
  
 - tx_in = dmaengine_prep_slave_sg(dd-dma_lch_in, in_sg, 1,
 + tx_in = dmaengine_prep_slave_sg(dd-dma_lch_in, in_sg, in_sg_len,
   DMA_MEM_TO_DEV,
   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
   if (!tx_in) {
 @@ -533,7 +525,7 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
   return ret;
   }
  
 - tx_out = dmaengine_prep_slave_sg(dd-dma_lch_out, out_sg, 1,
 + tx_out = dmaengine_prep_slave_sg(dd-dma_lch_out, out_sg, out_sg_len,
   DMA_DEV_TO_MEM,
   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
   if (!tx_out) {
 @@ -551,7 +543,7 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
   dma_async_issue_pending(dd-dma_lch_out);
  
   /* start DMA */
 - dd-pdata-trigger(dd, length);
 + dd-pdata-trigger(dd, dd-total);
  
   return 0;
  }
 @@ -560,93 +552,28 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev 
 *dd)
  {
   struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
   crypto_ablkcipher_reqtfm(dd-req));
 - int err, fast = 0, in, out;
 - size_t count;
 - dma_addr_t addr_in, addr_out;
 - struct scatterlist *in_sg, *out_sg;
 - int len32;
 + int err;
  
   pr_debug(total: %d\n, dd-total);
  
 - if (sg_is_last(dd-in_sg)  sg_is_last(dd-out_sg)) {
 - /* check for alignment */
 - in = IS_ALIGNED((u32)dd-in_sg-offset, sizeof(u32));
 - out = IS_ALIGNED((u32)dd-out_sg-offset, sizeof(u32));
 -
 - fast = in  out;
 + err = dma_map_sg(dd-dev, dd-in_sg, dd-in_sg_len, DMA_TO_DEVICE);
 + if (!err) {
 + dev_err(dd-dev, dma_map_sg() error\n);
 + return -EINVAL;
   }
  
 - if (fast)  {
 - count = min(dd-total, sg_dma_len(dd-in_sg));
 - count = min(count, sg_dma_len(dd-out_sg));
 -
 - if (count != dd-total) {
 - pr_err(request length != buffer length\n);
 - return -EINVAL;
 - }
 -
 - pr_debug(fast\n);
 -
 - err = dma_map_sg(dd-dev, dd-in_sg, 1, DMA_TO_DEVICE);
 - if (!err) {
 - dev_err(dd-dev, dma_map_sg() error\n);
 - return -EINVAL;
 - }
 -
 - err = dma_map_sg(dd-dev, dd-out_sg, 1, DMA_FROM_DEVICE);
 - if (!err) {
 - 

Re: [PATCH v2 04/14] crypto: omap-aes: Simplify DMA usage by using direct SGs

2013-08-20 Thread Joel Fernandes
On 08/20/2013 07:57 AM, Lokesh Vutla wrote:
 Hi Joel,
 
 On Sunday 18 August 2013 08:12 AM, Joel Fernandes wrote:
 In early version of this driver, assumptions were made such as DMA layer
 requires contiguous buffers etc. Due to this, new buffers were allocated,
 mapped and used for DMA. These assumptions are no longer true and DMAEngine
 scatter-gather DMA doesn't have such requirements. We simply the DMA 
 operations
 by directly using the scatter-gather buffers provided by the crypto layer
 instead of creating our own.

 Lot of logic that handled DMA'ing only X number of bytes of the total, or as
 much as fitted into a 3rd party buffer is removed and is no longer required.

 Also, good performance improvement of atleast ~20% seen with encrypting a
 buffer size of 8K (1800 ops/sec vs 1400 ops/sec).  Improvement will be higher
 for much larger blocks though such benchmarking is left as an exercise for 
 the
 reader.  Also DMA usage is much more simplified and coherent with rest of the
 code.

 Signed-off-by: Joel Fernandes jo...@ti.com
 ---
  drivers/crypto/omap-aes.c |  147 
 -
  1 file changed, 25 insertions(+), 122 deletions(-)

 diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
 index e369e6e..64dd5c1 100644
 --- a/drivers/crypto/omap-aes.c
 +++ b/drivers/crypto/omap-aes.c
 @@ -480,22 +480,14 @@ static int sg_copy(struct scatterlist **sg, size_t 
 *offset, void *buf,
  }
  
  static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
 -struct scatterlist *in_sg, struct scatterlist *out_sg)
 +struct scatterlist *in_sg, struct scatterlist *out_sg,
 +int in_sg_len, int out_sg_len)
  {
  struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  struct omap_aes_dev *dd = ctx-dd;
  struct dma_async_tx_descriptor *tx_in, *tx_out;
  struct dma_slave_config cfg;
 -dma_addr_t dma_addr_in = sg_dma_address(in_sg);
 -int ret, length = sg_dma_len(in_sg);
 -
 -pr_debug(len: %d\n, length);
 -
 -dd-dma_size = length;
 -
 -if (!(dd-flags  FLAGS_FAST))
 -dma_sync_single_for_device(dd-dev, dma_addr_in, length,
 -   DMA_TO_DEVICE);
 +int ret;
 By this change FLAGS_FAST is unsed, it can be cleaned right?
 or Am I missing something?

Yes, FLAGS_FAST would be unused now and can go away. Since it is very trivial
change, I will make this change in the not-immediate future and submit.

Thanks,

-Joel


--
To unsubscribe from this list: send the line unsubscribe linux-crypto in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 04/14] crypto: omap-aes: Simplify DMA usage by using direct SGs

2013-08-17 Thread Joel Fernandes
In early version of this driver, assumptions were made such as DMA layer
requires contiguous buffers etc. Due to this, new buffers were allocated,
mapped and used for DMA. These assumptions are no longer true and DMAEngine
scatter-gather DMA doesn't have such requirements. We simply the DMA operations
by directly using the scatter-gather buffers provided by the crypto layer
instead of creating our own.

Lot of logic that handled DMA'ing only X number of bytes of the total, or as
much as fitted into a 3rd party buffer is removed and is no longer required.

Also, good performance improvement of atleast ~20% seen with encrypting a
buffer size of 8K (1800 ops/sec vs 1400 ops/sec).  Improvement will be higher
for much larger blocks though such benchmarking is left as an exercise for the
reader.  Also DMA usage is much more simplified and coherent with rest of the
code.

Signed-off-by: Joel Fernandes jo...@ti.com
---
 drivers/crypto/omap-aes.c |  147 -
 1 file changed, 25 insertions(+), 122 deletions(-)

diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index e369e6e..64dd5c1 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -480,22 +480,14 @@ static int sg_copy(struct scatterlist **sg, size_t 
*offset, void *buf,
 }
 
 static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
-   struct scatterlist *in_sg, struct scatterlist *out_sg)
+   struct scatterlist *in_sg, struct scatterlist *out_sg,
+   int in_sg_len, int out_sg_len)
 {
struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
struct omap_aes_dev *dd = ctx-dd;
struct dma_async_tx_descriptor *tx_in, *tx_out;
struct dma_slave_config cfg;
-   dma_addr_t dma_addr_in = sg_dma_address(in_sg);
-   int ret, length = sg_dma_len(in_sg);
-
-   pr_debug(len: %d\n, length);
-
-   dd-dma_size = length;
-
-   if (!(dd-flags  FLAGS_FAST))
-   dma_sync_single_for_device(dd-dev, dma_addr_in, length,
-  DMA_TO_DEVICE);
+   int ret;
 
memset(cfg, 0, sizeof(cfg));
 
@@ -514,7 +506,7 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
return ret;
}
 
-   tx_in = dmaengine_prep_slave_sg(dd-dma_lch_in, in_sg, 1,
+   tx_in = dmaengine_prep_slave_sg(dd-dma_lch_in, in_sg, in_sg_len,
DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!tx_in) {
@@ -533,7 +525,7 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
return ret;
}
 
-   tx_out = dmaengine_prep_slave_sg(dd-dma_lch_out, out_sg, 1,
+   tx_out = dmaengine_prep_slave_sg(dd-dma_lch_out, out_sg, out_sg_len,
DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
if (!tx_out) {
@@ -551,7 +543,7 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
dma_async_issue_pending(dd-dma_lch_out);
 
/* start DMA */
-   dd-pdata-trigger(dd, length);
+   dd-pdata-trigger(dd, dd-total);
 
return 0;
 }
@@ -560,93 +552,28 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev 
*dd)
 {
struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
crypto_ablkcipher_reqtfm(dd-req));
-   int err, fast = 0, in, out;
-   size_t count;
-   dma_addr_t addr_in, addr_out;
-   struct scatterlist *in_sg, *out_sg;
-   int len32;
+   int err;
 
pr_debug(total: %d\n, dd-total);
 
-   if (sg_is_last(dd-in_sg)  sg_is_last(dd-out_sg)) {
-   /* check for alignment */
-   in = IS_ALIGNED((u32)dd-in_sg-offset, sizeof(u32));
-   out = IS_ALIGNED((u32)dd-out_sg-offset, sizeof(u32));
-
-   fast = in  out;
+   err = dma_map_sg(dd-dev, dd-in_sg, dd-in_sg_len, DMA_TO_DEVICE);
+   if (!err) {
+   dev_err(dd-dev, dma_map_sg() error\n);
+   return -EINVAL;
}
 
-   if (fast)  {
-   count = min(dd-total, sg_dma_len(dd-in_sg));
-   count = min(count, sg_dma_len(dd-out_sg));
-
-   if (count != dd-total) {
-   pr_err(request length != buffer length\n);
-   return -EINVAL;
-   }
-
-   pr_debug(fast\n);
-
-   err = dma_map_sg(dd-dev, dd-in_sg, 1, DMA_TO_DEVICE);
-   if (!err) {
-   dev_err(dd-dev, dma_map_sg() error\n);
-   return -EINVAL;
-   }
-
-   err = dma_map_sg(dd-dev, dd-out_sg, 1, DMA_FROM_DEVICE);
-   if (!err) {
-   dev_err(dd-dev, dma_map_sg() error\n);
-   dma_unmap_sg(dd-dev, dd-in_sg, 1, DMA_TO_DEVICE);
-   return -EINVAL;
-   }
-
-