On 16-10-20, 02:39, Sanjay R Mehta wrote:
> From: Sanjay R Mehta <sanju.me...@amd.com>
> 
> Adding support for AMD PTDMA controller. It performs high-bandwidth

Add support or Adds support.. would be apt!

> memory to memory and IO copy operation. Device commands are managed
> via a circular queue of 'descriptors', each of which specifies source
> and destination addresses for copying a single buffer of data.
> 
> Signed-off-by: Sanjay R Mehta <sanju.me...@amd.com>
> ---
>  MAINTAINERS                   |   6 +
>  drivers/dma/Kconfig           |   2 +
>  drivers/dma/Makefile          |   1 +
>  drivers/dma/ptdma/Kconfig     |  11 ++
>  drivers/dma/ptdma/Makefile    |  10 ++
>  drivers/dma/ptdma/ptdma-dev.c | 296 +++++++++++++++++++++++++++++++++++++++
>  drivers/dma/ptdma/ptdma-pci.c | 252 +++++++++++++++++++++++++++++++++
>  drivers/dma/ptdma/ptdma.h     | 316 
> ++++++++++++++++++++++++++++++++++++++++++
>  8 files changed, 894 insertions(+)
>  create mode 100644 drivers/dma/ptdma/Kconfig
>  create mode 100644 drivers/dma/ptdma/Makefile
>  create mode 100644 drivers/dma/ptdma/ptdma-dev.c
>  create mode 100644 drivers/dma/ptdma/ptdma-pci.c
>  create mode 100644 drivers/dma/ptdma/ptdma.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 33b27e6..f6ae0d1 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -943,6 +943,12 @@ S:       Supported
>  F:   arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi
>  F:   drivers/net/ethernet/amd/xgbe/
>  
> +AMD PTDMA DRIVER
> +M:   Sanjay R Mehta <sanju.me...@amd.com>
> +L:   dmaeng...@vger.kernel.org
> +S:   Maintained
> +F:   drivers/dma/ptdma/
> +
>  ANALOG DEVICES INC AD5686 DRIVER
>  M:   Michael Hennerich <michael.henner...@analog.com>
>  L:   linux...@vger.kernel.org
> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> index 518a143..a21c983 100644
> --- a/drivers/dma/Kconfig
> +++ b/drivers/dma/Kconfig
> @@ -748,6 +748,8 @@ source "drivers/dma/ti/Kconfig"
>  
>  source "drivers/dma/fsl-dpaa2-qdma/Kconfig"
>  
> +source "drivers/dma/ptdma/Kconfig"
> +
>  # clients
>  comment "DMA Clients"
>       depends on DMA_ENGINE
> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
> index e60f813..2785756 100644
> --- a/drivers/dma/Makefile
> +++ b/drivers/dma/Makefile
> @@ -83,6 +83,7 @@ obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
>  obj-$(CONFIG_ZX_DMA) += zx_dma.o
>  obj-$(CONFIG_ST_FDMA) += st_fdma.o
>  obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/
> +obj-$(CONFIG_AMD_PTDMA) += ptdma/

Please keep this sorted :(

>  obj-y += mediatek/
>  obj-y += qcom/
> diff --git a/drivers/dma/ptdma/Kconfig b/drivers/dma/ptdma/Kconfig
> new file mode 100644
> index 0000000..f93f9c2
> --- /dev/null
> +++ b/drivers/dma/ptdma/Kconfig
> @@ -0,0 +1,11 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +config AMD_PTDMA
> +     tristate  "AMD PassThru DMA Engine"
> +     depends on X86_64 && PCI
> +     help
> +       Enable support for the AMD PTDMA controller.  This controller
> +       provides DMA capabilities & performs high bandwidth memory to
> +       memory and IO copy operation and performs DMA transfer through
> +       queue based descriptor management. This DMA controller is intended
> +       to use with AMD Non-Transparent Bridge devices and not for general
> +       purpose slave DMA.

s/slave/peripheral

> +static int pt_core_execute_cmd(struct ptdma_desc *desc,
> +                            struct pt_cmd_queue *cmd_q)
> +{
> +     __le32 *mp;
> +     u32 *dp;
> +     u32 tail;
> +     int i;
> +
> +     if (desc->dw0.soc) {
> +             desc->dw0.ioc = 1;
> +             desc->dw0.soc = 0;
> +     }
> +     mutex_lock(&cmd_q->q_mutex);
> +
> +     mp = (__le32 *)&cmd_q->qbase[cmd_q->qidx];

why not make the qidx __le32 instead of int ?

> +     dp = (u32 *)desc;
> +     for (i = 0; i < 8; i++)

why 8..?

> +             mp[i] = cpu_to_le32(dp[i]); /* handle endianness */

Where is this used..? Why not drop this..

> +static irqreturn_t pt_core_irq_handler(int irq, void *data)
> +{
> +     struct pt_device *pt = (struct pt_device *)data;

why do you need cast away from void *

> +int pt_core_init(struct pt_device *pt)
> +{
> +     char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
> +     struct pt_cmd_queue *cmd_q = &pt->cmd_q;
> +     u32 dma_addr_lo, dma_addr_hi;
> +     struct device *dev = pt->dev;
> +     struct dma_pool *dma_pool;
> +     int ret;
> +
> +     /* Allocate a dma pool for the queue */
> +     snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q", pt->name);
> +
> +     dma_pool = dma_pool_create(dma_pool_name, dev,
> +                                PT_DMAPOOL_MAX_SIZE,
> +                                PT_DMAPOOL_ALIGN, 0);
> +     if (!dma_pool) {
> +             dev_err(dev, "unable to allocate dma pool\n");
> +             ret = -ENOMEM;
> +             return ret;

return -ENOMEM?

> +     }
> +
> +     /* ptdma core initialisation */
> +     iowrite32(CMD_CONFIG_VHB_EN, pt->io_regs + CMD_CONFIG_OFFSET);
> +     iowrite32(CMD_QUEUE_PRIO, pt->io_regs + CMD_QUEUE_PRIO_OFFSET);
> +     iowrite32(CMD_TIMEOUT_DISABLE, pt->io_regs + CMD_TIMEOUT_OFFSET);
> +     iowrite32(CMD_CLK_GATE_CONFIG, pt->io_regs + CMD_CLK_GATE_CTL_OFFSET);
> +     iowrite32(CMD_CONFIG_REQID, pt->io_regs + CMD_REQID_CONFIG_OFFSET);
> +
> +     cmd_q->pt = pt;
> +     cmd_q->dma_pool = dma_pool;
> +     mutex_init(&cmd_q->q_mutex);
> +
> +     /* Page alignment satisfies our needs for N <= 128 */
> +     cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
> +     cmd_q->qbase = dma_alloc_coherent(dev, cmd_q->qsize,
> +                                       &cmd_q->qbase_dma,
> +                                       GFP_KERNEL);
> +     if (!cmd_q->qbase) {
> +             dev_err(dev, "unable to allocate command queue\n");
> +             ret = -ENOMEM;
> +             goto e_dma_alloc;
> +     }
> +
> +     cmd_q->qidx = 0;
> +
> +     /* Preset some register values */
> +     cmd_q->reg_control = pt->io_regs + CMD_Q_STATUS_INCR;
> +     pt_init_cmdq_regs(cmd_q);
> +
> +     /* Turn off the queues and disable interrupts until ready */
> +     pt_core_disable_queue_interrupts(pt);
> +
> +     cmd_q->qcontrol = 0; /* Start with nothing */
> +     iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
> +
> +     ioread32(cmd_q->reg_int_status);
> +     ioread32(cmd_q->reg_status);
> +
> +     /* Clear the interrupt status */
> +     iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status);
> +
> +     /* Request an irq */
> +     ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, pt->name, pt);
> +     if (ret) {
> +             dev_err(dev, "unable to allocate an IRQ\n");

Do log the error while you are it!

> +             goto e_pool;
> +     }
> +
> +     /* Update the device registers with queue information.  */
> +
> +     cmd_q->qcontrol &= ~(CMD_Q_SIZE << CMD_Q_SHIFT);
> +     cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD_Q_SHIFT;

Please use helpers in bitfield.h to do the shifts for you, no need to
define the shift values

> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/delay.h>
> +#include <linux/interrupt.h>
> +#include <linux/kernel.h>
> +#include <linux/kthread.h>
> +#include <linux/module.h>
> +#include <linux/pci_ids.h>
> +#include <linux/pci.h>
> +#include <linux/spinlock.h>
> +#include <linux/sched.h>

why do you need sched.h here?

> +
> +#include "ptdma.h"
> +
> +/* Ever-increasing value to produce unique unit numbers */
> +static atomic_t pt_ordinal;

What is the need of that?

> +static int pt_get_irqs(struct pt_device *pt)
> +{
> +     struct device *dev = pt->dev;
> +     int ret;
> +
> +     ret = pt_get_msix_irqs(pt);
> +     if (!ret)
> +             return 0;
> +
> +     /* Couldn't get MSI-X vectors, try MSI */
> +     dev_dbg(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
> +     ret = pt_get_msi_irq(pt);
> +     if (!ret)
> +             return 0;
> +
> +     /* Couldn't get MSI interrupt */
> +     dev_dbg(dev, "could not enable MSI (%d)\n", ret);

Should this not be an error?

> +/*
> + * descriptor for PTDMA commands
> + * 8 32-bit words:
> + * word 0: function; engine; control bits
> + * word 1: length of source data
> + * word 2: low 32 bits of source pointer
> + * word 3: upper 16 bits of source pointer; source memory type
> + * word 4: low 32 bits of destination pointer
> + * word 5: upper 16 bits of destination pointer; destination memory type
> + * word 6: reserved 32 bits
> + * word 7: reserved 32 bits
> + */
> +
> +union dword0 {
> +     struct {
> +             unsigned int soc:1;
> +             unsigned int ioc:1;
> +             unsigned int rsvd1:1;
> +             unsigned int init:1;
> +             unsigned int eom:1;
> +             unsigned int function:15;
> +             unsigned int engine:4;
> +             unsigned int prot:1;
> +             unsigned int rsvd2:7;
> +     };

Do you really want to use union and bitfields here, this seem like HW
description, best to use FIELD_PREP or FIELD_GET for these

Been there, done that would advise to avoid this approach.

-- 
~Vinod

Reply via email to