Every single coherent DMA memory buffer occupies at least one page.
Reduce memory usage by switching from coherent buffers to streaming
DMA for I/O requests (struct skd_fitmsg_context) and S/G-lists
(struct fit_sg_descriptor[]).

Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Johannes Thumshirn <jthumsh...@suse.de>
---
 drivers/block/skd_main.c | 145 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 108 insertions(+), 37 deletions(-)

diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index a20434ca3e18..610c8979dc7e 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -32,6 +32,7 @@
 #include <linux/aer.h>
 #include <linux/wait.h>
 #include <linux/stringify.h>
+#include <linux/slab_def.h>
 #include <scsi/scsi.h>
 #include <scsi/sg.h>
 #include <linux/io.h>
@@ -256,6 +257,9 @@ struct skd_device {
 
        u8 skcomp_cycle;
        u32 skcomp_ix;
+       struct kmem_cache *msgbuf_cache;
+       struct kmem_cache *sglist_cache;
+       struct kmem_cache *databuf_cache;
        struct fit_completion_entry_v1 *skcomp_table;
        struct fit_comp_error_info *skerr_table;
        dma_addr_t cq_dma_address;
@@ -538,6 +542,11 @@ static void skd_process_request(struct request *req, bool 
last)
                return;
        }
 
+       dma_sync_single_for_device(&skdev->pdev->dev, skreq->sksg_dma_address,
+                                  skreq->n_sg *
+                                  sizeof(struct fit_sg_descriptor),
+                                  DMA_TO_DEVICE);
+
        spin_lock_irqsave(&skdev->lock, flags);
        /* Either a FIT msg is in progress or we have to start one. */
        skmsg = skdev->skmsg;
@@ -1078,6 +1087,11 @@ static void skd_complete_internal(struct skd_device 
*skdev,
 
        dev_dbg(&skdev->pdev->dev, "complete internal %x\n", scsi->cdb[0]);
 
+       dma_sync_single_for_cpu(&skdev->pdev->dev,
+                               skspcl->db_dma_address,
+                               skspcl->req.sksg_list[0].byte_count,
+                               DMA_BIDIRECTIONAL);
+
        skspcl->req.completion = *skcomp;
        skspcl->req.state = SKD_REQ_STATE_IDLE;
        skspcl->req.id += SKD_ID_INCR;
@@ -1263,6 +1277,9 @@ static void skd_send_fitmsg(struct skd_device *skdev,
                 */
                qcmd |= FIT_QCMD_MSGSIZE_64;
 
+       dma_sync_single_for_device(&skdev->pdev->dev, skmsg->mb_dma_address,
+                                  skmsg->length, DMA_TO_DEVICE);
+
        /* Make sure skd_msg_buf is written before the doorbell is triggered. */
        smp_wmb();
 
@@ -1274,6 +1291,8 @@ static void skd_send_special_fitmsg(struct skd_device 
*skdev,
 {
        u64 qcmd;
 
+       WARN_ON_ONCE(skspcl->req.n_sg != 1);
+
        if (unlikely(skdev->dbg_level > 1)) {
                u8 *bp = (u8 *)skspcl->msg_buf;
                int i;
@@ -1307,6 +1326,17 @@ static void skd_send_special_fitmsg(struct skd_device 
*skdev,
        qcmd = skspcl->mb_dma_address;
        qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
 
+       dma_sync_single_for_device(&skdev->pdev->dev, skspcl->mb_dma_address,
+                                  SKD_N_SPECIAL_FITMSG_BYTES, DMA_TO_DEVICE);
+       dma_sync_single_for_device(&skdev->pdev->dev,
+                                  skspcl->req.sksg_dma_address,
+                                  1 * sizeof(struct fit_sg_descriptor),
+                                  DMA_TO_DEVICE);
+       dma_sync_single_for_device(&skdev->pdev->dev,
+                                  skspcl->db_dma_address,
+                                  skspcl->req.sksg_list[0].byte_count,
+                                  DMA_BIDIRECTIONAL);
+
        /* Make sure skd_msg_buf is written before the doorbell is triggered. */
        smp_wmb();
 
@@ -2619,6 +2649,35 @@ static void skd_release_irq(struct skd_device *skdev)
  *****************************************************************************
  */
 
+static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s,
+                          dma_addr_t *dma_handle, gfp_t gfp,
+                          enum dma_data_direction dir)
+{
+       struct device *dev = &skdev->pdev->dev;
+       void *buf;
+
+       buf = kmem_cache_alloc(s, gfp);
+       if (!buf)
+               return NULL;
+       *dma_handle = dma_map_single(dev, buf, s->size, dir);
+       if (dma_mapping_error(dev, *dma_handle)) {
+               kfree(buf);
+               buf = NULL;
+       }
+       return buf;
+}
+
+static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s,
+                        void *vaddr, dma_addr_t dma_handle,
+                        enum dma_data_direction dir)
+{
+       if (!vaddr)
+               return;
+
+       dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir);
+       kmem_cache_free(s, vaddr);
+}
+
 static int skd_cons_skcomp(struct skd_device *skdev)
 {
        int rc = 0;
@@ -2695,18 +2754,14 @@ static struct fit_sg_descriptor 
*skd_cons_sg_list(struct skd_device *skdev,
                                                  dma_addr_t *ret_dma_addr)
 {
        struct fit_sg_descriptor *sg_list;
-       u32 nbytes;
 
-       nbytes = sizeof(*sg_list) * n_sg;
-
-       sg_list = pci_alloc_consistent(skdev->pdev, nbytes, ret_dma_addr);
+       sg_list = skd_alloc_dma(skdev, skdev->sglist_cache, ret_dma_addr,
+                               GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
 
        if (sg_list != NULL) {
                uint64_t dma_address = *ret_dma_addr;
                u32 i;
 
-               memset(sg_list, 0, nbytes);
-
                for (i = 0; i < n_sg - 1; i++) {
                        uint64_t ndp_off;
                        ndp_off = (i + 1) * sizeof(struct fit_sg_descriptor);
@@ -2720,15 +2775,14 @@ static struct fit_sg_descriptor 
*skd_cons_sg_list(struct skd_device *skdev,
 }
 
 static void skd_free_sg_list(struct skd_device *skdev,
-                            struct fit_sg_descriptor *sg_list, u32 n_sg,
+                            struct fit_sg_descriptor *sg_list,
                             dma_addr_t dma_addr)
 {
-       u32 nbytes = sizeof(*sg_list) * n_sg;
-
        if (WARN_ON_ONCE(!sg_list))
                return;
 
-       pci_free_consistent(skdev->pdev, nbytes, sg_list, dma_addr);
+       skd_free_dma(skdev, skdev->sglist_cache, sg_list, dma_addr,
+                    DMA_TO_DEVICE);
 }
 
 static int skd_init_request(struct blk_mq_tag_set *set, struct request *rq,
@@ -2752,34 +2806,31 @@ static void skd_exit_request(struct blk_mq_tag_set 
*set, struct request *rq,
        struct skd_device *skdev = set->driver_data;
        struct skd_request_context *skreq = blk_mq_rq_to_pdu(rq);
 
-       skd_free_sg_list(skdev, skreq->sksg_list,
-                        skdev->sgs_per_request,
-                        skreq->sksg_dma_address);
+       skd_free_sg_list(skdev, skreq->sksg_list, skreq->sksg_dma_address);
 }
 
 static int skd_cons_sksb(struct skd_device *skdev)
 {
        int rc = 0;
        struct skd_special_context *skspcl;
-       u32 nbytes;
 
        skspcl = &skdev->internal_skspcl;
 
        skspcl->req.id = 0 + SKD_ID_INTERNAL;
        skspcl->req.state = SKD_REQ_STATE_IDLE;
 
-       nbytes = SKD_N_INTERNAL_BYTES;
-
-       skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
-                                                &skspcl->db_dma_address);
+       skspcl->data_buf = skd_alloc_dma(skdev, skdev->databuf_cache,
+                                        &skspcl->db_dma_address,
+                                        GFP_DMA | __GFP_ZERO,
+                                        DMA_BIDIRECTIONAL);
        if (skspcl->data_buf == NULL) {
                rc = -ENOMEM;
                goto err_out;
        }
 
-       nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-       skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
-                                               &skspcl->mb_dma_address);
+       skspcl->msg_buf = skd_alloc_dma(skdev, skdev->msgbuf_cache,
+                                       &skspcl->mb_dma_address,
+                                       GFP_DMA | __GFP_ZERO, DMA_TO_DEVICE);
        if (skspcl->msg_buf == NULL) {
                rc = -ENOMEM;
                goto err_out;
@@ -2886,6 +2937,7 @@ static struct skd_device *skd_construct(struct pci_dev 
*pdev)
 {
        struct skd_device *skdev;
        int blk_major = skd_major;
+       size_t size;
        int rc;
 
        skdev = kzalloc(sizeof(*skdev), GFP_KERNEL);
@@ -2914,6 +2966,31 @@ static struct skd_device *skd_construct(struct pci_dev 
*pdev)
        INIT_WORK(&skdev->start_queue, skd_start_queue);
        INIT_WORK(&skdev->completion_worker, skd_completion_worker);
 
+       size = max(SKD_N_FITMSG_BYTES, SKD_N_SPECIAL_FITMSG_BYTES);
+       skdev->msgbuf_cache = kmem_cache_create("skd-msgbuf", size, 0,
+                                               SLAB_HWCACHE_ALIGN, NULL);
+       if (!skdev->msgbuf_cache)
+               goto err_out;
+       WARN_ONCE(kmem_cache_size(skdev->msgbuf_cache) < size,
+                 "skd-msgbuf: %d < %zd\n",
+                 kmem_cache_size(skdev->msgbuf_cache), size);
+       size = skd_sgs_per_request * sizeof(struct fit_sg_descriptor);
+       skdev->sglist_cache = kmem_cache_create("skd-sglist", size, 0,
+                                               SLAB_HWCACHE_ALIGN, NULL);
+       if (!skdev->sglist_cache)
+               goto err_out;
+       WARN_ONCE(kmem_cache_size(skdev->sglist_cache) < size,
+                 "skd-sglist: %d < %zd\n",
+                 kmem_cache_size(skdev->sglist_cache), size);
+       size = SKD_N_INTERNAL_BYTES;
+       skdev->databuf_cache = kmem_cache_create("skd-databuf", size, 0,
+                                                SLAB_HWCACHE_ALIGN, NULL);
+       if (!skdev->databuf_cache)
+               goto err_out;
+       WARN_ONCE(kmem_cache_size(skdev->databuf_cache) < size,
+                 "skd-databuf: %d < %zd\n",
+                 kmem_cache_size(skdev->databuf_cache), size);
+
        dev_dbg(&skdev->pdev->dev, "skcomp\n");
        rc = skd_cons_skcomp(skdev);
        if (rc < 0)
@@ -2986,31 +3063,21 @@ static void skd_free_skmsg(struct skd_device *skdev)
 
 static void skd_free_sksb(struct skd_device *skdev)
 {
-       struct skd_special_context *skspcl;
-       u32 nbytes;
-
-       skspcl = &skdev->internal_skspcl;
-
-       if (skspcl->data_buf != NULL) {
-               nbytes = SKD_N_INTERNAL_BYTES;
+       struct skd_special_context *skspcl = &skdev->internal_skspcl;
 
-               pci_free_consistent(skdev->pdev, nbytes,
-                                   skspcl->data_buf, skspcl->db_dma_address);
-       }
+       skd_free_dma(skdev, skdev->databuf_cache, skspcl->data_buf,
+                    skspcl->db_dma_address, DMA_BIDIRECTIONAL);
 
        skspcl->data_buf = NULL;
        skspcl->db_dma_address = 0;
 
-       if (skspcl->msg_buf != NULL) {
-               nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
-               pci_free_consistent(skdev->pdev, nbytes,
-                                   skspcl->msg_buf, skspcl->mb_dma_address);
-       }
+       skd_free_dma(skdev, skdev->msgbuf_cache, skspcl->msg_buf,
+                    skspcl->mb_dma_address, DMA_TO_DEVICE);
 
        skspcl->msg_buf = NULL;
        skspcl->mb_dma_address = 0;
 
-       skd_free_sg_list(skdev, skspcl->req.sksg_list, 1,
+       skd_free_sg_list(skdev, skspcl->req.sksg_list,
                         skspcl->req.sksg_dma_address);
 
        skspcl->req.sksg_list = NULL;
@@ -3056,6 +3123,10 @@ static void skd_destruct(struct skd_device *skdev)
        dev_dbg(&skdev->pdev->dev, "skcomp\n");
        skd_free_skcomp(skdev);
 
+       kmem_cache_destroy(skdev->databuf_cache);
+       kmem_cache_destroy(skdev->sglist_cache);
+       kmem_cache_destroy(skdev->msgbuf_cache);
+
        dev_dbg(&skdev->pdev->dev, "skdev\n");
        kfree(skdev);
 }
-- 
2.14.0

Reply via email to