Hi Andrew
> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of
> Andrew Bresticker
> Sent: 09 March 2015 05:42
> To: James Hartley
> Cc: [email protected]
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
>
> Hi James,
>
> On Thu, Mar 5, 2015 at 7:01 PM, James Hartley <[email protected]>
> wrote:
> > This adds support for the Imagination Technologies hash accelerator
> > which provides hardware acceleration for
> > SHA1 SHA224 SHA256 and MD5 hashes.
> >
> > Signed-off-by: James Hartley <[email protected]>
>
> Some general comments below, I'll leave review of the crypto-specific stuff to
> Herbert.
>
> > diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index
> > 3924f93..fb84be7 100644
> > --- a/drivers/crypto/Makefile
> > +++ b/drivers/crypto/Makefile
> > @@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
> > obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
> > obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
> > obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
> > +obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
> > obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
> > obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
> > obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o @@ -25,3 +26,4 @@
> > obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
> > obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
> > obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
> > obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
> > +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
>
> Unrelated change - perhaps a bad merge conflict resolution?
Yep, now resolved, thanks.
>
> > diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c new
> > file mode 100644 index 0000000..94a3a6f
> > --- /dev/null
> > +++ b/drivers/crypto/img-hash.c
> > @@ -0,0 +1,1037 @@
> > +/*
> > + * Copyright (c) 2014 Imagination Technologies
> > + * Authors: Will Thomas, James Hartley
> > + *
> > + * This program is free software; you can redistribute it and/or
> > +modify
> > + * it under the terms of the GNU General Public License version 2 as
> > +published
> > + * by the Free Software Foundation.
> > + *
> > + * Interface structure taken from omap-sham driver
> > + */
> > +
> > +#include <linux/clk.h>
> > +#include <linux/dmaengine.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/of_device.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/scatterlist.h>
> > +
> > +#include <crypto/internal/hash.h>
> > +#include <crypto/md5.h>
> > +#include <crypto/sha.h>
> > +
> > +#define CR_RESET 0
> > +#define CR_RESET_SET 1
> > +#define CR_RESET_UNSET 0
> > +
> > +#define CR_MESSAGE_LENGTH_H 0x4
> > +#define CR_MESSAGE_LENGTH_L 0x8
> > +
> > +#define CR_CONTROL 0xc
> > +#define CR_CONTROL_BYTE_ORDER_3210 0
> > +#define CR_CONTROL_BYTE_ORDER_0123 1
> > +#define CR_CONTROL_BYTE_ORDER_2310 2
> > +#define CR_CONTROL_BYTE_ORDER_1032 3
> > +#define CR_CONTROL_BYTE_ORDER_SHIFT 8
> > +#define CR_CONTROL_ALGO_MD5 0
> > +#define CR_CONTROL_ALGO_SHA1 1
> > +#define CR_CONTROL_ALGO_SHA224 2
> > +#define CR_CONTROL_ALGO_SHA256 3
> > +
> > +#define CR_INTSTAT 0x10
> > +#define CR_INTENAB 0x14
> > +#define CR_INTCLEAR 0x18
> > +#define CR_INT_RESULTS_AVAILABLE BIT(0)
> > +#define CR_INT_NEW_RESULTS_SET BIT(1)
> > +#define CR_INT_RESULT_READ_ERR BIT(2)
> > +#define CR_INT_MESSAGE_WRITE_ERROR BIT(3)
> > +#define CR_INT_STATUS BIT(8)
> > +
> > +#define CR_RESULT_QUEUE 0x1c
> > +#define CR_RSD0 0x40
> > +#define CR_CORE_REV 0x50
> > +#define CR_CORE_DES1 0x60
> > +#define CR_CORE_DES2 0x70
> > +
> > +#define DRIVER_FLAGS_BUSY BIT(0)
> > +#define DRIVER_FLAGS_FINAL BIT(1)
> > +#define DRIVER_FLAGS_DMA_ACTIVE BIT(2)
> > +#define DRIVER_FLAGS_OUTPUT_READY BIT(3)
> > +#define DRIVER_FLAGS_INIT BIT(4)
> > +#define DRIVER_FLAGS_CPU BIT(5)
> > +#define DRIVER_FLAGS_DMA_READY BIT(6)
> > +#define DRIVER_FLAGS_ERROR BIT(7)
> > +#define DRIVER_FLAGS_SG BIT(8)
> > +#define DRIVER_FLAGS_SHA1 BIT(18)
> > +#define DRIVER_FLAGS_SHA224 BIT(19)
> > +#define DRIVER_FLAGS_SHA256 BIT(20)
> > +#define DRIVER_FLAGS_MD5 BIT(21)
> > +
> > +#define IMG_HASH_QUEUE_LENGTH 20
> > +#define IMG_HASH_DMA_THRESHOLD 64
> > +
> > +#ifdef __LITTLE_ENDIAN
> > +#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_3210)
> > +#else
> > +#define IMG_HASH_BYTE_ORDER (CR_CONTROL_BYTE_ORDER_0123)
> > +#endif
>
> Unnecessary parenthesis.
Fixed
>
> > +struct img_hash_dev;
> > +
> > +struct img_hash_request_ctx {
> > + struct img_hash_dev *hdev;
> > + u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
> > + unsigned long flags;
> > + size_t digsize;
> > +
> > + dma_addr_t dma_addr;
> > + size_t dma_ct;
> > +
> > + /* sg root */
> > + struct scatterlist *sgfirst;
> > + /* walk state */
> > + struct scatterlist *sg;
> > + size_t nents;
> > + size_t offset;
> > + unsigned int total;
> > + size_t sent;
> > +
> > + unsigned long op;
> > +
> > + size_t bufcnt;
> > + u8 buffer[0] __aligned(sizeof(u32));
> > + struct ahash_request fallback_req;
> > +};
> > +
> > +struct img_hash_ctx {
> > + struct img_hash_dev *hdev;
> > + unsigned long flags;
> > + struct crypto_ahash *fallback;
> > +};
> > +
> > +struct img_hash_dev {
> > + struct list_head list;
> > + struct device *dev;
> > + struct clk *iclk;
> > + struct clk *fclk;
>
> Maybe make these names a little more obvious so it's clear which clock is
> which.
Agreed - done
>
> > + int irq;
>
> This isn't used outside of probe(), so it need not be carried around in this
> struct.
Done
>
> > + void __iomem *io_base;
> > +
> > + phys_addr_t bus_addr;
> > + void __iomem *cpu_addr;
> > +
> > + spinlock_t lock;
> > + int err;
> > + struct tasklet_struct done_task;
> > + struct tasklet_struct dma_task;
> > +
> > + unsigned long flags;
> > + struct crypto_queue queue;
> > + struct ahash_request *req;
> > +
> > + struct dma_chan *dma_lch;
> > +};
> > +
> > +struct img_hash_drv {
> > + struct list_head dev_list;
> > + spinlock_t lock;
> > +};
> > +
> > +static struct img_hash_drv img_hash = {
> > + .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
> > + .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
> > +};
> > +
> > +static inline u32 img_hash_read(struct img_hash_dev *hdev, u32
> > +offset) {
> > + return readl_relaxed(hdev->io_base + offset); }
> > +
> > +static inline void img_hash_write(struct img_hash_dev *hdev,
> > + u32 offset, u32 value) {
> > + writel_relaxed(value, hdev->io_base + offset); }
> > +
> > +static inline u32 img_hash_read_result_queue(struct img_hash_dev
> > +*hdev) {
> > + return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE)); }
> > +
> > +static void img_hash_start(struct img_hash_dev *hdev, bool dma) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > + u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
> > +
> > + if (ctx->flags & DRIVER_FLAGS_MD5)
> > + cr |= CR_CONTROL_ALGO_MD5;
> > + else if (ctx->flags & DRIVER_FLAGS_SHA1)
> > + cr |= CR_CONTROL_ALGO_SHA1;
> > + else if (ctx->flags & DRIVER_FLAGS_SHA224)
> > + cr |= CR_CONTROL_ALGO_SHA224;
> > + else if (ctx->flags & DRIVER_FLAGS_SHA256)
> > + cr |= CR_CONTROL_ALGO_SHA256;
> > + dev_dbg(hdev->dev, "Starting hash process\n");
> > + img_hash_write(hdev, CR_CONTROL, cr);
> > +
> > + /*
> > + * The hardware block requires two cycles between writing the
> > control
> > + * register and writing the first word of data in non DMA mode, to
> > + * ensure the first data write is not grouped in burst with the
> > control
> > + * register write a read is issued to 'flush' the bus.
> > + */
> > + if (!dma)
> > + img_hash_read(hdev, CR_CONTROL); }
> > +
> > +static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
> > + size_t length, int final) {
> > + u32 count, len32;
> > + const u32 *buffer = (const u32 *)buf;
> > +
> > + dev_dbg(hdev->dev, "xmit_cpu: length: %u bytes\n", length);
> > +
> > + if (final)
> > + hdev->flags |= DRIVER_FLAGS_FINAL;
> > +
> > + len32 = DIV_ROUND_UP(length, sizeof(u32));
> > +
> > + for (count = 0; count < len32; count++)
> > + writel_relaxed(buffer[count], hdev->cpu_addr);
> > +
> > + return -EINPROGRESS;
> > +}
> > +
> > +static void img_hash_dma_callback(void *data) {
> > + struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + if (ctx->bufcnt) {
> > + img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
> > + ctx->bufcnt = 0;
> > + }
> > + if (ctx->sg)
> > + tasklet_schedule(&hdev->dma_task);
> > +}
> > +
> > +static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct
> > +scatterlist *sg) {
> > + struct dma_async_tx_descriptor *desc;
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> > + if (ctx->dma_ct == 0) {
> > + dev_err(hdev->dev, "Invalid DMA sg\n");
> > + hdev->err = -EINVAL;
> > + return -EINVAL;
> > + }
> > +
> > + desc = dmaengine_prep_slave_sg(hdev->dma_lch,
> > + sg,
>
> Line this up (and the rest of this statement) with the open paren of
> dmaengine_prep_slave_sg().
Done
>
> > + ctx->dma_ct,
> > + DMA_MEM_TO_DEV,
> > + DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> > + if (!desc) {
> > + dev_err(hdev->dev, "Null DMA descriptor\n");
> > + hdev->err = -EINVAL;
> > + dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> > + return -EINVAL;
> > + }
> > + desc->callback = img_hash_dma_callback;
> > + desc->callback_param = hdev;
> > + dmaengine_submit(desc);
> > + dma_async_issue_pending(hdev->dma_lch);
> > +
> > + return 0;
> > +}
> > +
> > +static int img_hash_write_via_cpu(struct img_hash_dev *hdev) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > + int ret = 0;
> > +
> > + ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
> > + ctx->buffer,
> > + hdev->req->nbytes);
> > +
> > + ctx->total = hdev->req->nbytes;
> > + ctx->bufcnt = 0;
> > +
> > + hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
> > +
> > + img_hash_start(hdev, false);
> > +
> > + if (ctx->total)
> > + ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
> > + else
> > + ret = 0;
>
> The else block here (and possibly the entire check) seems unnecessary.
> img_hash_xmit_cpu() won't do anything if the length is 0.
Yes you're right, I've removed the check, and the temporary variable.
>
> > +
> > + return ret;
> > +}
> > +
> > +static int img_hash_finish(struct ahash_request *req) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > +
> > + if (!req->result)
> > + return -EINVAL;
> > +
> > + memcpy(req->result, ctx->digest, ctx->digsize);
> > +
> > + return 0;
> > +}
> > +
> > +static void img_hash_copy_hash(struct ahash_request *req) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + u32 *hash = (u32 *)ctx->digest;
> > + int i;
> > +
> > + for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
> > + hash[i] = img_hash_read_result_queue(ctx->hdev);
> > +}
> > +
> > +static void img_hash_finish_req(struct ahash_request *req, int err) {
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + struct img_hash_dev *hdev = ctx->hdev;
> > +
> > + if (!err) {
> > + img_hash_copy_hash(req);
> > + if (DRIVER_FLAGS_FINAL & hdev->flags)
> > + err = img_hash_finish(req);
> > + } else {
> > + dev_warn(hdev->dev, "Hash failed with error %d\n", err);
> > + ctx->flags |= DRIVER_FLAGS_ERROR;
> > + }
> > +
> > + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> DRIVER_FLAGS_OUTPUT_READY |
> > + DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY |
> > + DRIVER_FLAGS_FINAL);
> > +
> > + if (req->base.complete)
> > + req->base.complete(&req->base, err); }
> > +
> > +static int img_hash_write_via_dma(struct img_hash_dev *hdev) {
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + img_hash_start(hdev, true);
> > +
> > + dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
> > +
> > + if (!ctx->total)
> > + hdev->flags |= DRIVER_FLAGS_FINAL;
> > +
> > + hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
> > +
> > + tasklet_schedule(&hdev->dma_task);
> > +
> > + return -EINPROGRESS;
> > +}
> > +
> > +static int img_hash_dma_init(struct img_hash_dev *hdev) {
> > + struct dma_slave_config dma_conf;
> > + int err = -EINVAL;
> > +
> > + hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
> > + if (!hdev->dma_lch) {
> > + dev_err(hdev->dev, "Couldn't aquire a slave DMA
> > channel.\n");
> > + return -EBUSY;
> > + }
> > + dma_conf.direction = DMA_MEM_TO_DEV;
> > + dma_conf.dst_addr = hdev->bus_addr;
> > + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > + dma_conf.dst_maxburst = 16;
> > + dma_conf.device_fc = false;
> > +
> > + err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
> > +
> > + if (err) {
> > + dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
> > + dma_release_channel(hdev->dma_lch);
> > + return err;
> > + }
> > + return 0;
>
> nit: I would expect there to be a newline before the return rather than before
> the if().
Done
>
> > +}
> > +
> > +static void img_hash_dma_task(unsigned long d) {
> > + struct img_hash_dev *hdev = (struct img_hash_dev *)d;
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > + u8 *addr;
> > + size_t nbytes, bleft, wsend, len, tbc;
> > + struct scatterlist tsg;
> > +
> > + if (!ctx->sg)
> > + return;
> > +
> > + addr = sg_virt(ctx->sg);
> > + nbytes = ctx->sg->length - ctx->offset;
> > +
> > + /* The hash accelerator does not support a data valid mask. This
> means
> > + * that if each dma (i.e. per page) is not a multiple of 4 bytes,
> > the
> > + * padding bytes in the last word written by that dma would
> erroneously
> > + * be included in the hash. To avoid this we round down the
> > transfer,
> > + * and add the excess to the start of the next dma. It does not
> > matter
> > + * that the final dma may not be a multiple of 4 bytes as the
> > hashing
> > + * block is programmed to accept the correct number of bytes.
> > + */
>
> nit: Multi-line comment style is:
>
> /*
> * blah blah blah
> */
Fixed
>
> > +
> > + bleft = nbytes % 4;
> > + wsend = (nbytes / 4);
> > +
> > + if (wsend) {
> > + sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
> > + if (img_hash_xmit_dma(hdev, &tsg)) {
> > + dev_err(hdev->dev, "DMA failed, falling back to
> > CPU");
> > + ctx->flags |= DRIVER_FLAGS_CPU;
> > + hdev->err = 0;
> > + img_hash_xmit_cpu(hdev, addr + ctx->offset,
> > + wsend * 4, 0);
> > + ctx->sent += wsend * 4;
> > + wsend = 0;
> > + } else {
> > + ctx->sent += wsend * 4;
> > + }
> > + }
> > +
> > + if (bleft) {
> > + ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> > + ctx->buffer, bleft,
> > ctx->sent);
> > + tbc = 0;
> > + ctx->sg = sg_next(ctx->sg);
> > + while (ctx->sg && (ctx->bufcnt < 4)) {
> > + len = ctx->sg->length;
> > + if (likely(len > (4 - ctx->bufcnt)))
> > + len = 4 - ctx->bufcnt;
> > + tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> > + ctx->buffer + ctx->bufcnt,
> > len,
> > + ctx->sent + ctx->bufcnt);
> > + ctx->bufcnt += tbc;
> > + if (tbc >= ctx->sg->length) {
> > + ctx->sg = sg_next(ctx->sg);
> > + tbc = 0;
> > + }
> > + }
> > +
> > + ctx->sent += ctx->bufcnt;
> > + ctx->offset = tbc;
> > +
> > + if (!wsend)
> > + img_hash_dma_callback(hdev);
> > + } else {
> > + ctx->offset = 0;
> > + ctx->sg = sg_next(ctx->sg);
> > + }
> > +}
> > +
> > +static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev) {
> > + struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > + if (ctx->flags & DRIVER_FLAGS_SG)
> > + dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct,
> > + DMA_TO_DEVICE);
> > +
> > + return 0;
> > +}
> > +
> > +static int img_hash_process_data(struct img_hash_dev *hdev) {
> > + struct ahash_request *req = hdev->req;
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + int err = 0;
> > +
> > + ctx->bufcnt = 0;
> > +
> > + if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
> > + dev_dbg(hdev->dev, "process data request(%d bytes) using
> DMA\n",
> > + req->nbytes);
> > + err = img_hash_write_via_dma(hdev);
> > + } else {
> > + dev_dbg(hdev->dev, "process data request(%d bytes) using
> CPU\n",
> > + req->nbytes);
> > + err = img_hash_write_via_cpu(hdev);
> > + }
> > + return err;
> > +}
> > +
> > +static int img_hash_hw_init(struct img_hash_dev *hdev) {
> > + unsigned long long nbits;
> > + u32 u, l;
> > + int ret;
> > +
> > + ret = clk_prepare_enable(hdev->iclk);
>
> It looks like there's no corresponding clk_disable_unprepare() to this, so the
> prepare/enable counts will just keep increasing. Perhaps it would be better
> to manage enabling/disabling the clocks with runtime PM?
Yes it probably would be - I'll look at doing that.
>
> > + if (ret)
> > + return ret;
> > +
> > + img_hash_write(hdev, CR_RESET, CR_RESET_SET);
> > + img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
> > + img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
> > +
> > + nbits = (hdev->req->nbytes << 3);
> > + u = nbits >> 32;
> > + l = nbits;
> > + img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
> > + img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
> > +
> > + if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
> > + hdev->flags |= DRIVER_FLAGS_INIT;
> > + hdev->err = 0;
> > + }
> > + dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
> > + return 0;
> > +}
> > +
> > +static int img_hash_init(struct ahash_request *req) {
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > +
> > + return crypto_ahash_init(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_handle_queue(struct img_hash_dev *hdev,
> > + struct ahash_request *req) {
> > + struct crypto_async_request *async_req, *backlog;
> > + struct img_hash_request_ctx *ctx;
> > + unsigned long flags;
> > + int err = 0, res = 0;
> > +
> > + spin_lock_irqsave(&hdev->lock, flags);
> > +
> > + if (req)
> > + res = ahash_enqueue_request(&hdev->queue, req);
> > +
> > + if (DRIVER_FLAGS_BUSY & hdev->flags) {
> > + spin_unlock_irqrestore(&hdev->lock, flags);
> > + return res;
> > + }
> > +
> > + backlog = crypto_get_backlog(&hdev->queue);
> > + async_req = crypto_dequeue_request(&hdev->queue);
> > + if (async_req)
> > + hdev->flags |= DRIVER_FLAGS_BUSY;
> > +
> > + spin_unlock_irqrestore(&hdev->lock, flags);
> > +
> > + if (!async_req)
> > + return res;
> > +
> > + if (backlog)
> > + backlog->complete(backlog, -EINPROGRESS);
> > +
> > + req = ahash_request_cast(async_req);
> > + hdev->req = req;
> > +
> > + ctx = ahash_request_ctx(req);
> > +
> > + dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
> > + ctx->op, req->nbytes);
> > +
> > + err = img_hash_hw_init(hdev);
> > +
> > + if (!err)
> > + err = img_hash_process_data(hdev);
> > +
> > + if (err != -EINPROGRESS) {
> > + /* done_task will not finish so do it here */
> > + img_hash_finish_req(req, err);
> > + }
> > + return res;
> > +}
> > +
> > +static int img_hash_update(struct ahash_request *req) {
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > + rctx->fallback_req.nbytes = req->nbytes;
> > + rctx->fallback_req.src = req->src;
> > +
> > + return crypto_ahash_update(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_final(struct ahash_request *req) {
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > + rctx->fallback_req.result = req->result;
> > +
> > + return crypto_ahash_final(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_finup(struct ahash_request *req) {
> > + struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > + rctx->fallback_req.base.flags = req->base.flags
> > + & CRYPTO_TFM_REQ_MAY_SLEEP;
> > + rctx->fallback_req.nbytes = req->nbytes;
> > + rctx->fallback_req.src = req->src;
> > + rctx->fallback_req.result = req->result;
> > +
> > + return crypto_ahash_finup(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_digest(struct ahash_request *req) {
> > + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > + struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
> > + struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > + struct img_hash_dev *hdev = NULL;
> > + struct img_hash_dev *tmp;
> > + int err;
> > +
> > + spin_lock_bh(&img_hash.lock);
>
> Why spin_{lock,unlock}_bh() here?
Changed to spin_{lock, unlock}(), I don't see any reason why it should be _bh.
>
> > + if (!tctx->hdev) {
> > + list_for_each_entry(tmp, &img_hash.dev_list, list) {
> > + hdev = tmp;
> > + break;
> > + }
> > + tctx->hdev = hdev;
> > +
> > + } else {
> > + hdev = tctx->hdev;
> > + }
> > +
> > + spin_unlock_bh(&img_hash.lock);
> > + ctx->hdev = hdev;
> > + ctx->flags = 0;
> > + ctx->digsize = crypto_ahash_digestsize(tfm);
> > +
> > + switch (ctx->digsize) {
> > + case SHA1_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_SHA1;
> > + break;
> > + case SHA256_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_SHA256;
> > + break;
> > + case SHA224_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_SHA224;
> > + break;
> > + case MD5_DIGEST_SIZE:
> > + ctx->flags |= DRIVER_FLAGS_MD5;
> > + break;
> > + default:
> > + return -EINVAL;
> > + }
> > +
> > + ctx->bufcnt = 0;
> > + ctx->offset = 0;
> > + ctx->sent = 0;
> > + ctx->total = req->nbytes;
> > + ctx->sg = req->src;
> > + ctx->sgfirst = req->src;
> > + ctx->nents = sg_nents(ctx->sg);
> > +
> > + err = img_hash_handle_queue(tctx->hdev, req);
> > +
> > + return err;
> > +}
> > +
> > +static int img_hash_cra_init(struct crypto_tfm *tfm) {
> > + struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
> > + const char *alg_name = crypto_tfm_alg_name(tfm);
> > + int err = -ENOMEM;
> > +
> > + ctx->fallback = crypto_alloc_ahash(alg_name, 0,
> > + CRYPTO_ALG_NEED_FALLBACK);
> > + if (IS_ERR(ctx->fallback)) {
> > + pr_err("img_hash: Could not load fallback driver.\n");
> > + err = PTR_ERR(ctx->fallback);
> > + goto err;
> > + }
> > + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > + sizeof(struct img_hash_request_ctx) +
> > + IMG_HASH_DMA_THRESHOLD);
> > +
> > + return 0;
> > +
> > +err:
> > + return err;
> > +}
> > +
> > +static void img_hash_cra_exit(struct crypto_tfm *tfm) {
> > + struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
> > +
> > + crypto_free_ahash(tctx->fallback);
> > +}
> > +
> > +static irqreturn_t img_irq_handler(int irq, void *dev_id) {
> > + struct img_hash_dev *hdev = dev_id;
> > + u32 reg;
> > +
> > + reg = img_hash_read(hdev, CR_INTSTAT);
> > + img_hash_write(hdev, CR_INTCLEAR, reg);
> > +
> > + if (reg & CR_INT_NEW_RESULTS_SET) {
> > + dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
> > + if (DRIVER_FLAGS_BUSY & hdev->flags) {
> > + hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
> > + if (!(DRIVER_FLAGS_CPU & hdev->flags))
> > + hdev->flags |= DRIVER_FLAGS_DMA_READY;
> > + tasklet_schedule(&hdev->done_task);
>
> Since this done_task only gets scheduled from here, why not make this a
> threaded IRQ handler and just do the work here instead of separating it
> between a hard IRQ handler and a tasklet?
What is the advantage of doing that? i.e is this simple case worth setting up
an extra thread?
>
> > + } else {
> > + dev_warn(hdev->dev,
> > + "HASH interrupt when no active
> > requests.\n");
> > + }
> > + } else if (reg & CR_INT_RESULTS_AVAILABLE) {
> > + dev_warn(hdev->dev,
> > + "IRQ triggered before the hash had completed\n");
> > + } else if (reg & CR_INT_RESULT_READ_ERR) {
> > + dev_warn(hdev->dev,
> > + "Attempt to read from an empty result queue\n");
> > + } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
> > + dev_warn(hdev->dev,
> > + "Data written before the hardware was
> > configured\n");
> > + }
> > + return IRQ_HANDLED;
> > +}
> > +
> > +static struct ahash_alg img_algs[] = {
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = MD5_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "md5",
> > + .cra_driver_name = "img-md5",
> > + .cra_priority = 301,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + },
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = SHA1_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "sha1",
> > + .cra_driver_name = "img-sha1",
> > + .cra_priority = 3000,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = SHA1_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + },
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = SHA224_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "sha224",
> > + .cra_driver_name = "img-sha224",
> > + .cra_priority = 3000,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = SHA224_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + },
> > + {
> > + .init = img_hash_init,
> > + .update = img_hash_update,
> > + .final = img_hash_final,
> > + .finup = img_hash_finup,
> > + .digest = img_hash_digest,
> > + .halg = {
> > + .digestsize = SHA256_DIGEST_SIZE,
> > + .base = {
> > + .cra_name = "sha256",
> > + .cra_driver_name = "img-sha256",
> > + .cra_priority = 301,
> > + .cra_flags =
> > + CRYPTO_ALG_ASYNC |
> > + CRYPTO_ALG_NEED_FALLBACK,
> > + .cra_blocksize = SHA256_BLOCK_SIZE,
> > + .cra_ctxsize = sizeof(struct img_hash_ctx),
> > + .cra_init = img_hash_cra_init,
> > + .cra_exit = img_hash_cra_exit,
> > + .cra_module = THIS_MODULE,
> > + }
> > + }
> > + }
> > +};
> > +
> > +static int img_register_algs(struct img_hash_dev *hdev) {
> > + int i, err;
> > +
> > + for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
> > + err = crypto_register_ahash(&img_algs[i]);
> > + if (err)
> > + goto err_reg;
> > + }
> > + return 0;
> > +
> > +err_reg:
> > + for (; i--; )
> > + crypto_unregister_ahash(&img_algs[i]);
> > +
> > + return err;
> > +}
> > +
> > +static int img_unregister_algs(struct img_hash_dev *hdev) {
> > + int i;
> > +
> > + for (i = 0; i < ARRAY_SIZE(img_algs); i++)
> > + crypto_unregister_ahash(&img_algs[i]);
> > + return 0;
> > +}
> > +
> > +static void img_hash_done_task(unsigned long data) {
> > + struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> > + int err = 0;
> > +
> > + if (hdev->err == -EINVAL) {
> > + err = hdev->err;
> > + goto finish;
> > + }
> > +
> > + if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
> > + img_hash_handle_queue(hdev, NULL);
> > + return;
> > + }
> > +
> > + if (DRIVER_FLAGS_CPU & hdev->flags) {
> > + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> > + hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
> > + goto finish;
> > + }
> > + } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
> > + if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
> > + hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
> > + img_hash_write_via_dma_stop(hdev);
> > + if (hdev->err) {
> > + err = hdev->err;
> > + goto finish;
> > + }
> > + }
> > + if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> > + hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> > + DRIVER_FLAGS_OUTPUT_READY);
> > + goto finish;
> > + }
> > + }
> > + return;
> > +
> > +finish:
> > + img_hash_finish_req(hdev->req, err); }
> > +
> > +static const struct of_device_id img_hash_match[] = {
> > + { .compatible = "img,hash-accelerator" },
> > + {}
> > +};
> > +MODULE_DEVICE_TABLE(of, img_hash_match)
> > +
> > +static int img_hash_probe(struct platform_device *pdev) {
> > + struct img_hash_dev *hdev;
> > + struct device *dev = &pdev->dev;
> > + struct resource *hash_res;
> > + int err;
> > +
> > + hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
> > + if (hdev == NULL) {
> > + err = -ENOMEM;
> > + goto sha_dev_err;
>
> Why not just "return -ENOMEM" here? There should be no cleanup
> necessary at this point?
Done
>
> > + }
> > + spin_lock_init(&hdev->lock);
> > +
> > + hdev->dev = dev;
> > +
> > + platform_set_drvdata(pdev, hdev);
> > +
> > + INIT_LIST_HEAD(&hdev->list);
> > +
> > + tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned
> long)hdev);
> > + tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned
> > + long)hdev);
> > +
> > + crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
> > +
> > + /* Register bank */
> > + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +
> > + hdev->io_base = devm_ioremap_resource(dev, hash_res);
> > + if (IS_ERR(hdev->io_base)) {
> > + dev_err(dev, "can't ioremap\n");
>
> nit: When printing error messages, it's helpful to print the error code as
> well.
Done
>
> > + err = PTR_ERR(hdev->io_base);
> > + goto hash_io_err;
> > + }
> > +
> > + /* Write port (DMA or CPU) */
> > + hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > + if (!hash_res) {
> > + dev_err(dev, "no write port resource info\n");
> > + err = -ENODEV;
> > + goto res_err;
> > + }
> > + hdev->bus_addr = hash_res->start;
>
> Maybe set this after devm_ioremap_resource() succeeds and avoid the extra
> error check?
Not quite following you here - which check are you saying can be removed?
>
> > + hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
> > + if (IS_ERR(hdev->cpu_addr)) {
> > + dev_err(dev, "can't ioremap write port\n");
> > + err = PTR_ERR(hdev->cpu_addr);
> > + goto res_err;
> > + }
> > +
> > + hdev->irq = platform_get_irq(pdev, 0);
> > + if (hdev->irq < 0) {
> > + dev_err(dev, "no IRQ resource info\n");
> > + err = hdev->irq;
> > + goto res_err;
> > + }
> > +
> > + err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
> > + dev_name(dev), hdev);
> > + if (err) {
> > + dev_err(dev, "unable to request %s irq\n",
> > + dev_name(dev));
>
> dev_* already prints dev_name().
Yes, removed it
>
> > + goto res_err;
> > + }
> > + dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
> > +
> > + hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
> > + if (IS_ERR(hdev->iclk)) {
> > + dev_err(dev, "clock initialization failed.\n");
> > + err = PTR_ERR(hdev->iclk);
> > + goto res_err;
> > + }
> > +
> > + hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
>
> I don't see this clock getting enabled anywhere.
Good catch - now added to img_hash_hw_init.
>
> > + if (IS_ERR(hdev->fclk)) {
> > + dev_err(dev, "clock initialization failed.\n");
> > + err = PTR_ERR(hdev->fclk);
> > + goto res_err;
> > + }
> > +
> > + err = img_hash_dma_init(hdev);
> > + if (err)
> > + goto err_dma;
> > +
> > + dev_dbg(dev, "using %s for DMA transfers\n",
> > + dma_chan_name(hdev->dma_lch));
> > +
> > + spin_lock(&img_hash.lock);
> > + list_add_tail(&hdev->list, &img_hash.dev_list);
> > + spin_unlock(&img_hash.lock);
> > +
> > + err = img_register_algs(hdev);
> > + if (err)
> > + goto err_algs;
> > + dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr,
> > + 0x4);
>
> dev_dbg()? (or maybe not at all)
Removed it
>
> > + dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator
> > + initialized\n");
> > +
> > + return 0;
> > +
> > +err_algs:
> > + spin_lock(&img_hash.lock);
> > + list_del(&hdev->list);
> > + spin_unlock(&img_hash.lock);
> > + dma_release_channel(hdev->dma_lch);
> > +err_dma:
> > +hash_io_err:
> > + devm_clk_put(dev, hdev->iclk);
> > + devm_clk_put(dev, hdev->fclk);
>
> Since the clocks were acquired via the devm_* API, there's no need to
> explicitly put() them.
Done
>
> > +res_err:
> > + tasklet_kill(&hdev->done_task);
> > + tasklet_kill(&hdev->dma_task);
> > +sha_dev_err:
> > + dev_err(dev, "initialization failed.\n");
>
> The driver core will print an error message if a device probe fails (for
> reasons other than -ENODEV, at least), so there's no need to print here.
Removed
>
> > + return err;
> > +}
> > +
> > +static int img_hash_remove(struct platform_device *pdev) {
> > + static struct img_hash_dev *hdev;
> > +
> > + hdev = platform_get_drvdata(pdev);
> > + spin_lock(&img_hash.lock);
> > + list_del(&hdev->list);
> > + spin_unlock(&img_hash.lock);
> > +
> > + img_unregister_algs(hdev);
> > +
> > + tasklet_kill(&hdev->done_task);
> > + tasklet_kill(&hdev->dma_task);
> > +
> > + dma_release_channel(hdev->dma_lch);
> > +
> > + clk_disable_unprepare(hdev->iclk);
> > + clk_disable_unprepare(hdev->fclk);
>
> Unbalanced prepare_enable()/disable_unprepare().
Now enabled in img_hash_hw_init.
>
> > + return 0;
> > +}
> > +
> > +static struct platform_driver img_hash_driver = {
> > + .probe = img_hash_probe,
> > + .remove = img_hash_remove,
> > + .driver = {
> > + .name = "img-hash-accelerator",
> > + .of_match_table = of_match_ptr(img_hash_match),
> > + }
> > +};
> > +module_platform_driver(img_hash_driver);
> > +
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator
> > +driver"); MODULE_AUTHOR("Will Thomas."); MODULE_AUTHOR("James
> > +Hartley.");
>
> It's helpful to include email addresses in the MODULE_AUTHOR tags.
I can do that for my email address, but not for Will's
Thanks for the review Andrew!
James.