Introduce a common registry for NTB remote embedded-DMA (eDMA) backends. Vendor-specific backend drivers register here and the transport backend selects an implementation based on match score.
Add an initial backend for Synopsys DesignWare eDMA. The backend handles exposing the peer-visible eDMA register window and LL rings and provides the plumbing needed by the remote-eDMA transport backend. Signed-off-by: Koichiro Den <[email protected]> --- drivers/ntb/hw/Kconfig | 1 + drivers/ntb/hw/Makefile | 1 + drivers/ntb/hw/edma/Kconfig | 28 + drivers/ntb/hw/edma/Makefile | 5 + drivers/ntb/hw/edma/backend.c | 87 +++ drivers/ntb/hw/edma/backend.h | 102 ++++ drivers/ntb/hw/edma/ntb_dw_edma.c | 977 ++++++++++++++++++++++++++++++ 7 files changed, 1201 insertions(+) create mode 100644 drivers/ntb/hw/edma/Kconfig create mode 100644 drivers/ntb/hw/edma/Makefile create mode 100644 drivers/ntb/hw/edma/backend.c create mode 100644 drivers/ntb/hw/edma/backend.h create mode 100644 drivers/ntb/hw/edma/ntb_dw_edma.c diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig index c325be526b80..4d281f258643 100644 --- a/drivers/ntb/hw/Kconfig +++ b/drivers/ntb/hw/Kconfig @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only source "drivers/ntb/hw/amd/Kconfig" +source "drivers/ntb/hw/edma/Kconfig" source "drivers/ntb/hw/idt/Kconfig" source "drivers/ntb/hw/intel/Kconfig" source "drivers/ntb/hw/epf/Kconfig" diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile index 223ca592b5f9..05fcdd7d56b7 100644 --- a/drivers/ntb/hw/Makefile +++ b/drivers/ntb/hw/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_NTB_AMD) += amd/ +obj-$(CONFIG_NTB_EDMA) += edma/ obj-$(CONFIG_NTB_IDT) += idt/ obj-$(CONFIG_NTB_INTEL) += intel/ obj-$(CONFIG_NTB_EPF) += epf/ diff --git a/drivers/ntb/hw/edma/Kconfig b/drivers/ntb/hw/edma/Kconfig new file mode 100644 index 000000000000..e1e82570c8ac --- /dev/null +++ b/drivers/ntb/hw/edma/Kconfig @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config NTB_EDMA + tristate "NTB PCI EP embedded DMA backend registry" + help + Common registry for NTB remote embedded-DMA (eDMA) backends. + Vendor-specific backend drivers register themselves here, and the + remote-eDMA transport backend (NTB_TRANSPORT_EDMA) selects a backend + based on match() score. + + To compile this as a module, choose M here: the module will be called + ntb_edma. + + If unsure, say N. + +config NTB_DW_EDMA + tristate "DesignWare eDMA backend for NTB PCI EP embedded DMA" + depends on DW_EDMA + select NTB_EDMA + select DMA_ENGINE + help + Backend implementation for Synopsys DesignWare PCIe embedded DMA (eDMA) + used with the NTB remote-eDMA transport backend. + + To compile this driver as a module, choose M here: the module will be + called ntb_dw_edma. + + If unsure, say N. diff --git a/drivers/ntb/hw/edma/Makefile b/drivers/ntb/hw/edma/Makefile new file mode 100644 index 000000000000..993a5efd64f8 --- /dev/null +++ b/drivers/ntb/hw/edma/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_NTB_EDMA) += ntb_edma.o +ntb_edma-y := backend.o + +obj-$(CONFIG_NTB_DW_EDMA) += ntb_dw_edma.o diff --git a/drivers/ntb/hw/edma/backend.c b/drivers/ntb/hw/edma/backend.c new file mode 100644 index 000000000000..b59100c07908 --- /dev/null +++ b/drivers/ntb/hw/edma/backend.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Generic NTB remote PCI embedded DMA backend registry. + * + * The registry provides a vendor-agnostic rendezvous point for transport + * backends that want to use a peer-exposed embedded DMA engine. + */ + +#include <linux/cleanup.h> +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/ntb.h> + +#include "backend.h" + +static LIST_HEAD(ntb_edma_backends); +static DEFINE_MUTEX(ntb_edma_backends_lock); + +int ntb_edma_backend_register(struct ntb_edma_backend *be) +{ + struct ntb_edma_backend *tmp; + + if (!be || !be->name || !be->ops) + return -EINVAL; + + scoped_guard(mutex, &ntb_edma_backends_lock) { + list_for_each_entry(tmp, &ntb_edma_backends, node) { + if (!strcmp(tmp->name, be->name)) + return -EEXIST; + } + list_add_tail(&be->node, &ntb_edma_backends); + } + + ntb_bus_reprobe(); + return 0; +} +EXPORT_SYMBOL_GPL(ntb_edma_backend_register); + +void ntb_edma_backend_unregister(struct ntb_edma_backend *be) +{ + if (!be) + return; + + guard(mutex)(&ntb_edma_backends_lock); + list_del_init(&be->node); +} +EXPORT_SYMBOL_GPL(ntb_edma_backend_unregister); + +const struct ntb_edma_backend * +ntb_edma_backend_get(struct ntb_dev *ndev) +{ + const struct ntb_edma_backend *best = NULL, *be; + int best_score = INT_MIN, score; + + guard(mutex)(&ntb_edma_backends_lock); + list_for_each_entry(be, &ntb_edma_backends, node) { + score = be->ops->match ? be->ops->match(ndev) : -ENODEV; + if (score >= 0 && score > best_score) { + best = be; + best_score = score; + } + } + if (best && !try_module_get(best->owner)) + best = NULL; + return best; +} +EXPORT_SYMBOL_GPL(ntb_edma_backend_get); + +void ntb_edma_backend_put(const struct ntb_edma_backend *be) +{ + module_put(be->owner); +} +EXPORT_SYMBOL_GPL(ntb_edma_backend_put); + +static int __init ntb_edma_init(void) +{ + return 0; +} +module_init(ntb_edma_init); + +static void __exit ntb_edma_exit(void) +{ +} +module_exit(ntb_edma_exit); + +MODULE_DESCRIPTION("NTB remote embedded DMA backend registry"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/ntb/hw/edma/backend.h b/drivers/ntb/hw/edma/backend.h new file mode 100644 index 000000000000..c15a78fd4063 --- /dev/null +++ b/drivers/ntb/hw/edma/backend.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ + +#ifndef _NTB_HW_EDMA_BACKEND_H_ +#define _NTB_HW_EDMA_BACKEND_H_ + +#include <linux/init.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/ntb.h> + +#define NTB_EDMA_CH_NUM 4 + +/* + * REMOTE_EDMA_EP: + * Endpoint owns the eDMA engine and pushes descriptors into a shared MW. + * + * REMOTE_EDMA_RC: + * Root Complex controls the endpoint eDMA through the shared MW and + * drives reads/writes on behalf of the host. + */ +typedef enum { + REMOTE_EDMA_UNKNOWN, + REMOTE_EDMA_EP, + REMOTE_EDMA_RC, +} remote_edma_mode_t; + +struct ntb_edma_chans { + struct device *dev; + + struct dma_chan *chan[NTB_EDMA_CH_NUM]; + struct dma_chan *intr_chan; + + unsigned int num_chans; + atomic_t cur_chan; + + struct mutex lock; +}; + +/** + * struct ntb_edma_backend_ops - operations for a remote embedded-DMA backend + * + * A backend provides the hardware-specific plumbing required by the + * ntb_transport remote-eDMA backend, such as exposing peer-mappable resources + * via an NTB MW, setting up DMA channels, and delivering peer notifications. + * + * @match: Optional. Return a non-negative score if this backend + * supports @ndev. Higher score wins. Return a negative + * errno otherwise. + * @alloc: Allocate backend-private per-device state and store + * it in *@priv. Called once during transport backend + * initialization. + * @free: Free backend-private state allocated by @alloc. + * @ep_publish: EP-side control plane. Publish peer-accessible resources + * via MW @mw_index for @qp_count queue pairs, and arm + * the notification path. When a peer notification is + * received, invoke @cb(@cb_data, qp_num). + * @ep_unpublish: Undo @ep_publish. + * @rc_connect: RC-side control plane. Connect to peer-published resources + * via MW @mw_index for @qp_count queue pairs. + * @rc_disconnect: Undo @rc_connect. + * @tx_chans_init: Initialize DMA channels used for data transfer into @chans. + * @tx_chans_deinit: Tear down DMA channels initialized by @tx_chans_init. + * @notify_peer: Try to notify the peer about updated shared state for + * @qp_num. Return 0 if the peer has been notified (no + * doorbell fallback needed). Return a non-zero value to + * request a doorbell-based fallback. + */ +struct ntb_edma_backend_ops { + int (*match)(struct ntb_dev *ndev); + int (*alloc)(struct ntb_dev *ndev, void **priv); + void (*free)(struct ntb_dev *ndev, void **priv); + + /* Control plane: EP publishes and RC connects */ + int (*ep_publish)(struct ntb_dev *ndev, void *priv, int mw_index, + unsigned int qp_count, + void (*cb)(void *data, int qp_num), void *cb_data); + void (*ep_unpublish)(struct ntb_dev *ndev, void *priv); + int (*rc_connect)(struct ntb_dev *ndev, void *priv, int mw_index, + unsigned int qp_count); + void (*rc_disconnect)(struct ntb_dev *ndev, void *priv); + + /* Data plane: TX channels */ + int (*tx_chans_init)(struct ntb_dev *ndev, void *priv, + struct ntb_edma_chans *chans, bool remote); + void (*tx_chans_deinit)(struct ntb_edma_chans *chans); + int (*notify_peer)(struct ntb_edma_chans *chans, void *priv, + int qp_num); +}; + +struct ntb_edma_backend { + const char *name; + const struct ntb_edma_backend_ops *ops; + struct module *owner; + struct list_head node; +}; + +int ntb_edma_backend_register(struct ntb_edma_backend *be); +void ntb_edma_backend_unregister(struct ntb_edma_backend *be); +const struct ntb_edma_backend *ntb_edma_backend_get(struct ntb_dev *ndev); +void ntb_edma_backend_put(const struct ntb_edma_backend *be); + +#endif /* _NTB_HW_EDMA_BACKEND_H_ */ diff --git a/drivers/ntb/hw/edma/ntb_dw_edma.c b/drivers/ntb/hw/edma/ntb_dw_edma.c new file mode 100644 index 000000000000..f4c8985889eb --- /dev/null +++ b/drivers/ntb/hw/edma/ntb_dw_edma.c @@ -0,0 +1,977 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +/* + * NTB remote DesignWare eDMA helpers + * + * This file is a helper library used by the NTB transport remote-eDMA backend, + * not a standalone NTB hardware driver. It contains the DesignWare eDMA + * specific plumbing needed to expose/map peer-accessible resources via an NTB + * memory window and to manage DMA channels and peer notifications. + */ + +#include <linux/dma/edma.h> +#include <linux/dmaengine.h> +#include <linux/device.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/ntb.h> +#include <linux/pci.h> +#include <linux/pci-epc.h> +#include <linux/spinlock.h> +#include <linux/xarray.h> + +#include "backend.h" + +/* One extra channel is reserved for notification (RC to EP interrupt kick). */ +#define NTB_DW_EDMA_TOTAL_CH_NUM (NTB_EDMA_CH_NUM + 1) + +#define NTB_DW_EDMA_INFO_MAGIC 0x45444D41 /* "EDMA" */ +#define NTB_DW_EDMA_NOTIFY_MAX_QP 64 +#define NTB_DW_EDMA_NR_IRQS 4 +#define NTB_DW_EDMA_MW_IDX_INVALID (-1) + +/* Default eDMA LLP memory size */ +#define DMA_LLP_MEM_SIZE PAGE_SIZE + +typedef void (*ntb_edma_interrupt_cb_t)(void *data, int qp_num); + +struct ntb_edma_ctx { + bool initialized; + + /* Fields for the notification handling */ + u32 qp_count; + u32 *notify_src_virt; + dma_addr_t notify_src_phys; + struct scatterlist sgl; + + /* Host-to-EP scratch buffer used to convey event information */ + union { + struct ntb_dw_edma_db *db_virt; + struct ntb_dw_edma_db __iomem *db_io; + }; + dma_addr_t db_phys; + + /* Deterministic mapping for dw-edma .irq_vector callback */ + unsigned int peer_irq_count; + int peer_irq_vec[NTB_DW_EDMA_NR_IRQS]; + + /* For interrupts */ + ntb_edma_interrupt_cb_t cb; + void *cb_data; + + /* Below are the records for teardown path */ + + int mw_index; + bool mw_trans_set; + + /* For ntb_dw_edma_info to be unmapped on teardown */ + struct ntb_dw_edma_info *info_virt; + dma_addr_t info_phys; + size_t info_bytes; + + /* Scratchpad backing for the unused tail of the inbound MW */ + void *mw_pad_virt; + dma_addr_t mw_pad_phys; + size_t mw_pad_bytes; + + /* eDMA register window IOMMU mapping (EP side) */ + bool reg_mapped; + struct iommu_domain *iommu_dom; + unsigned long reg_iova; + size_t reg_iova_size; + + /* Read channels delegated to the host side (EP side) */ + struct dma_chan *dchan[NTB_DW_EDMA_TOTAL_CH_NUM]; + + /* RC-side state */ + bool peer_initialized; + bool peer_probed; + struct dw_edma_chip *peer_chip; + void __iomem *peer_virt; + resource_size_t peer_virt_size; +}; + +struct ntb_dw_edma_info { + u32 magic; + u32 reg_size; + u16 ch_cnt; + u64 db_base; + u64 ll_rd_phys[NTB_DW_EDMA_TOTAL_CH_NUM]; +}; + +struct ntb_dw_edma_db { + u32 target; + u32 db[NTB_DW_EDMA_NOTIFY_MAX_QP]; +}; + +struct ntb_edma_filter { + struct device *dma_dev; + u32 direction; +}; + +static DEFINE_XARRAY(ntb_dw_edma_ctx_xa); +static DEFINE_SPINLOCK(ntb_dw_edma_notify_lock); + +static void ntb_dw_edma_ep_unpublish(struct ntb_dev *ndev, void *priv); + +static int ntb_dw_edma_ctx_register(struct device *dev, struct ntb_edma_ctx *ctx) +{ + return xa_insert(&ntb_dw_edma_ctx_xa, (unsigned long)dev, ctx, GFP_KERNEL); +} + +static void ntb_dw_edma_ctx_unregister(struct device *dev) +{ + xa_erase(&ntb_dw_edma_ctx_xa, (unsigned long)dev); +} + +static struct ntb_edma_ctx *ntb_dw_edma_ctx_lookup(struct device *dev) +{ + return xa_load(&ntb_dw_edma_ctx_xa, (unsigned long)dev); +} + +static bool ntb_dw_edma_filter_fn(struct dma_chan *chan, void *arg) +{ + struct ntb_edma_filter *filter = arg; + u32 dir = filter->direction; + struct dma_slave_caps caps; + int ret; + + if (chan->device->dev != filter->dma_dev) + return false; + + ret = dma_get_slave_caps(chan, &caps); + if (ret < 0) + return false; + + return !!(caps.directions & dir); +} + +static void ntb_dw_edma_notify_cb(struct dma_chan *dchan, void *data) +{ + struct ntb_edma_ctx *ctx = data; + ntb_edma_interrupt_cb_t cb; + struct ntb_dw_edma_db *db; + void *cb_data; + u32 qp_count; + u32 i, val; + + guard(spinlock_irqsave)(&ntb_dw_edma_notify_lock); + + cb = ctx->cb; + cb_data = ctx->cb_data; + qp_count = ctx->qp_count; + db = ctx->db_virt; + if (!cb || !db) + return; + + for (i = 0; i < qp_count; i++) { + val = READ_ONCE(db->db[i]); + if (!val) + continue; + + WRITE_ONCE(db->db[i], 0); + cb(cb_data, i); + } +} + +static void ntb_dw_edma_undelegate_chans(struct ntb_edma_ctx *ctx) +{ + unsigned int i; + + if (!ctx) + return; + + scoped_guard(spinlock_irqsave, &ntb_dw_edma_notify_lock) { + ctx->cb = NULL; + ctx->cb_data = NULL; + } + + for (i = 0; i < NTB_DW_EDMA_TOTAL_CH_NUM; i++) { + if (!ctx->dchan[i]) + continue; + + if (i == NTB_EDMA_CH_NUM) + dw_edma_chan_register_notify(ctx->dchan[i], NULL, NULL); + + dma_release_channel(ctx->dchan[i]); + ctx->dchan[i] = NULL; + } +} + +static int ntb_dw_edma_delegate_chans(struct device *dev, + struct ntb_edma_ctx *ctx, + struct ntb_dw_edma_info *info, + ntb_edma_interrupt_cb_t cb, void *data) +{ + struct ntb_edma_filter filter; + struct dw_edma_region region; + dma_cap_mask_t dma_mask; + struct dma_chan *chan; + unsigned int i; + int rc; + + dma_cap_zero(dma_mask); + dma_cap_set(DMA_SLAVE, dma_mask); + + filter.dma_dev = dev; + + /* Configure read channels, which will be driven by the host side */ + for (i = 0; i < NTB_DW_EDMA_TOTAL_CH_NUM; i++) { + filter.direction = BIT(DMA_DEV_TO_MEM); + chan = dma_request_channel(dma_mask, ntb_dw_edma_filter_fn, + &filter); + if (!chan) { + rc = -ENODEV; + goto err; + } + ctx->dchan[i] = chan; + + if (i == NTB_EDMA_CH_NUM) { + scoped_guard(spinlock_irqsave, &ntb_dw_edma_notify_lock) { + ctx->cb = cb; + ctx->cb_data = data; + } + rc = dw_edma_chan_register_notify(chan, + ntb_dw_edma_notify_cb, + ctx); + if (rc) + goto err; + } else { + rc = dw_edma_chan_irq_config(chan, DW_EDMA_CH_IRQ_REMOTE); + if (rc) + dev_warn(dev, "irq config failed (i=%u %d)\n", + i, rc); + } + + rc = dw_edma_chan_get_ll_region(chan, ®ion); + if (rc) + goto err; + + info->ll_rd_phys[i] = region.paddr; + } + + return 0; + +err: + ntb_dw_edma_undelegate_chans(ctx); + return rc; +} + +static void ntb_dw_edma_ctx_reset(struct ntb_edma_ctx *ctx) +{ + ctx->initialized = false; + ctx->mw_index = NTB_DW_EDMA_MW_IDX_INVALID; + ctx->mw_trans_set = false; + ctx->reg_mapped = false; + ctx->iommu_dom = NULL; + ctx->reg_iova = 0; + ctx->reg_iova_size = 0; + ctx->db_phys = 0; + ctx->qp_count = 0; + ctx->info_virt = NULL; + ctx->info_phys = 0; + ctx->info_bytes = 0; + ctx->mw_pad_virt = NULL; + ctx->mw_pad_phys = 0; + ctx->mw_pad_bytes = 0; + ctx->db_virt = NULL; + memset(ctx->dchan, 0, sizeof(ctx->dchan)); +} + +static int ntb_dw_edma_match(struct ntb_dev *ndev) +{ + struct pci_epc *epc; + phys_addr_t reg_phys; + resource_size_t reg_size; + + /* EP can verify the local DesignWare eDMA presence via epc hook. */ + epc = ntb_get_private_data(ndev); + if (epc) { + if (dw_edma_get_reg_window(epc, ®_phys, ®_size)) + return -ENODEV; + return 100; + } + + /* Host cannot validate peer eDMA until link/peer mapping is done. */ + return 50; +} + +static int ntb_dw_edma_alloc(struct ntb_dev *ndev, void **priv) +{ + struct ntb_edma_ctx *ctx; + + ctx = devm_kzalloc(&ndev->dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + *priv = ctx; + return 0; +} + +static void ntb_dw_edma_free(struct ntb_dev *ndev, void **priv) +{ + devm_kfree(&ndev->dev, *priv); + *priv = NULL; +} + +static int ntb_dw_edma_ep_publish(struct ntb_dev *ndev, void *priv, + int mw_index, unsigned int qp_count, + ntb_edma_interrupt_cb_t cb, void *data) +{ + struct ntb_edma_ctx *ctx = priv; + struct ntb_dw_edma_info *info; + struct ntb_dw_edma_db *db; + struct iommu_domain *dom; + struct pci_epc *epc; + struct device *dev; + unsigned int num_subrange = NTB_DW_EDMA_TOTAL_CH_NUM + 3; + resource_size_t reg_size, reg_size_mw; + const size_t info_bytes = PAGE_SIZE; + dma_addr_t db_phys, info_phys; + phys_addr_t edma_reg_phys; + resource_size_t size_max; + size_t ll_bytes, size; + unsigned int cur = 0; + u64 need; + int rc; + u32 i; + + if (ctx->initialized) + return 0; + + /* Clean up stale state from a previous failed attempt. */ + ntb_dw_edma_ep_unpublish(ndev, ctx); + + epc = (struct pci_epc *)ntb_get_private_data(ndev); + if (!epc) + return -ENODEV; + dev = epc->dev.parent; + + ntb_dw_edma_ctx_reset(ctx); + + ctx->mw_index = mw_index; + ctx->qp_count = qp_count; + + info = dma_alloc_coherent(dev, info_bytes, &info_phys, GFP_KERNEL); + if (!info) + return -ENOMEM; + memset(info, 0, info_bytes); + + ctx->info_virt = info; + ctx->info_phys = info_phys; + ctx->info_bytes = info_bytes; + + /* Get eDMA reg base and size, IOMMU map it if necessary */ + rc = dw_edma_get_reg_window(epc, &edma_reg_phys, ®_size); + if (rc) { + dev_err(&ndev->pdev->dev, + "failed to get eDMA register window: %d\n", rc); + goto err; + } + dom = iommu_get_domain_for_dev(dev); + if (dom) { + phys_addr_t phys; + unsigned long iova; + + phys = edma_reg_phys & PAGE_MASK; + size = PAGE_ALIGN(reg_size + edma_reg_phys - phys); + iova = phys; + + rc = iommu_map(dom, iova, phys, size, + IOMMU_READ | IOMMU_WRITE | IOMMU_MMIO, + GFP_KERNEL); + if (rc) { + dev_err(&ndev->dev, + "failed to direct map eDMA reg: %d\n", rc); + goto err; + } + + ctx->reg_mapped = true; + ctx->iommu_dom = dom; + ctx->reg_iova = iova; + ctx->reg_iova_size = size; + } + + /* Read channels are driven by the peer (host side) */ + rc = ntb_dw_edma_delegate_chans(dev, ctx, info, cb, data); + if (rc) { + dev_err(&ndev->pdev->dev, + "failed to prepare channels to delegate: %d\n", rc); + goto err; + } + + /* Scratch buffer for notification */ + db = dma_alloc_coherent(dev, sizeof(*db), &db_phys, GFP_KERNEL); + if (!db) { + rc = -ENOMEM; + goto err; + } + memset(db, 0, sizeof(*db)); + + ctx->db_virt = db; + ctx->db_phys = db_phys; + + /* Prep works for IB iATU mappings */ + ll_bytes = NTB_DW_EDMA_TOTAL_CH_NUM * DMA_LLP_MEM_SIZE; + reg_size_mw = roundup_pow_of_two(reg_size); + need = info_bytes + PAGE_SIZE + reg_size_mw + ll_bytes; + + rc = ntb_mw_get_align(ndev, 0, mw_index, NULL, NULL, &size_max); + if (rc) + goto err; + + if (size_max < need) { + rc = -ENOSPC; + goto err; + } + + if (need < size_max) + num_subrange++; + + struct ntb_mw_subrange *r __free(kfree) = + kcalloc(num_subrange, sizeof(*r), GFP_KERNEL); + if (!r) { + rc = -ENOMEM; + goto err; + } + + ctx->mw_trans_set = true; + + /* iATU map ntb_dw_edma_info */ + r[cur].addr = info_phys; + r[cur++].size = info_bytes; + + /* iATU map ntb_dw_edma_db */ + r[cur].addr = db_phys; + r[cur++].size = PAGE_SIZE; + + /* iATU map eDMA reg */ + r[cur].addr = edma_reg_phys; + r[cur++].size = reg_size_mw; + + /* iATU map LL location */ + for (i = 0; i < NTB_DW_EDMA_TOTAL_CH_NUM; i++) { + r[cur].addr = info->ll_rd_phys[i]; + r[cur++].size = DMA_LLP_MEM_SIZE; + } + + /* Padding if needed */ + if (size_max - need > 0) { + resource_size_t pad_bytes = size_max - need; + dma_addr_t pad_phys; + void *pad; + + pad = dma_alloc_coherent(dev, pad_bytes, &pad_phys, GFP_KERNEL); + if (!pad) { + rc = -ENOMEM; + goto err; + } + memset(pad, 0, pad_bytes); + + ctx->mw_pad_virt = pad; + ctx->mw_pad_phys = pad_phys; + ctx->mw_pad_bytes = pad_bytes; + + r[cur].addr = pad_phys; + r[cur++].size = pad_bytes; + } + + rc = ntb_mw_set_trans_ranges(ndev, 0, mw_index, num_subrange, r); + if (rc) + goto err; + + /* Fill in info */ + info->magic = NTB_DW_EDMA_INFO_MAGIC; + info->reg_size = reg_size_mw; + info->ch_cnt = NTB_DW_EDMA_TOTAL_CH_NUM; + info->db_base = db_phys; + + ctx->initialized = true; + return 0; + +err: + ntb_dw_edma_ep_unpublish(ndev, ctx); + return rc; +} + +static void ntb_dw_edma_peer_irq_reset(struct ntb_edma_ctx *ctx) +{ + ctx->peer_irq_count = 0; + memset(ctx->peer_irq_vec, 0xff, sizeof(ctx->peer_irq_vec)); +} + +static int ntb_dw_edma_reserve_peer_irq_vectors(struct pci_dev *pdev, + struct ntb_edma_ctx *ctx, + unsigned int nreq) +{ + int i, found = 0; + int irq; + + if (nreq > NTB_DW_EDMA_NR_IRQS) + return -EINVAL; + + ntb_dw_edma_peer_irq_reset(ctx); + + /* NTB driver should have reserved sufficient number of vectors */ + for (i = 0; found < nreq; i++) { + irq = pci_irq_vector(pdev, i); + if (irq < 0) + break; + if (!irq_has_action(irq)) + ctx->peer_irq_vec[found++] = i; + } + if (found < nreq) + return -ENOSPC; + + ctx->peer_irq_count = found; + return 0; +} + +static int ntb_dw_edma_irq_vector(struct device *dev, unsigned int nr) +{ + struct ntb_edma_ctx *ctx = ntb_dw_edma_ctx_lookup(dev); + struct pci_dev *pdev = to_pci_dev(dev); + int vec; + + if (!ctx) + return -EINVAL; + + if (nr >= ctx->peer_irq_count) + return -EINVAL; + + vec = ctx->peer_irq_vec[nr]; + if (vec < 0) + return -EINVAL; + + return pci_irq_vector(pdev, vec); +} + +static const struct dw_edma_plat_ops ntb_dw_edma_ops = { + .irq_vector = ntb_dw_edma_irq_vector, +}; + +static void ntb_dw_edma_rc_disconnect(struct ntb_dev *ndev, void *priv) +{ + struct ntb_edma_ctx *ctx = priv; + void __iomem *peer_virt = ctx->peer_virt; + struct dw_edma_chip *chip = ctx->peer_chip; + u32 *notify_src = ctx->notify_src_virt; + dma_addr_t notify_src_phys = ctx->notify_src_phys; + + /* Stop using peer MMIO early. */ + ctx->db_io = NULL; + ctx->db_phys = 0; + ctx->qp_count = 0; + + if (ctx->peer_probed && chip) + dw_edma_remove(chip); + + ntb_dw_edma_ctx_unregister(&ndev->pdev->dev); + + ntb_dw_edma_peer_irq_reset(ctx); + + ctx->peer_initialized = false; + ctx->peer_probed = false; + ctx->peer_chip = NULL; + + if (notify_src) + dma_free_coherent(&ndev->pdev->dev, sizeof(*notify_src), + notify_src, notify_src_phys); + + ctx->notify_src_virt = NULL; + ctx->notify_src_phys = 0; + memset(&ctx->sgl, 0, sizeof(ctx->sgl)); + + if (peer_virt) + iounmap(peer_virt); + + ctx->peer_virt = NULL; + ctx->peer_virt_size = 0; +} + +static int ntb_dw_edma_rc_connect(struct ntb_dev *ndev, void *priv, int mw_index, + unsigned int qp_count) +{ + struct ntb_edma_ctx *ctx = priv; + struct ntb_dw_edma_info __iomem *info; + struct dw_edma_chip *chip; + void __iomem *edma_virt; + resource_size_t mw_size; + phys_addr_t edma_phys; + unsigned int ch_cnt; + unsigned int i; + int ret; + u64 off; + + if (ctx->peer_initialized) + return 0; + + /* Clean up stale state from a previous failed attempt. */ + ntb_dw_edma_rc_disconnect(ndev, priv); + + ret = ntb_peer_mw_get_addr(ndev, mw_index, &edma_phys, &mw_size); + if (ret) + return ret; + + edma_virt = ioremap(edma_phys, mw_size); + if (!edma_virt) + return -ENOMEM; + + ctx->peer_virt = edma_virt; + ctx->peer_virt_size = mw_size; + + info = edma_virt; + if (readl(&info->magic) != NTB_DW_EDMA_INFO_MAGIC) { + ret = -EINVAL; + goto err; + } + + ch_cnt = readw(&info->ch_cnt); + if (ch_cnt != NTB_DW_EDMA_TOTAL_CH_NUM) { + ret = -EINVAL; + goto err; + } + + chip = devm_kzalloc(&ndev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) { + ret = -ENOMEM; + goto err; + } + + ret = ntb_dw_edma_ctx_register(&ndev->pdev->dev, ctx); + if (ret) + return ret; + + off = 2 * PAGE_SIZE; + chip->dev = &ndev->pdev->dev; + chip->nr_irqs = NTB_DW_EDMA_NR_IRQS; + chip->ops = &ntb_dw_edma_ops; + chip->flags = 0; + chip->reg_base = edma_virt + off; + chip->mf = EDMA_MF_EDMA_UNROLL; + chip->ll_wr_cnt = 0; + chip->ll_rd_cnt = ch_cnt; + + ctx->db_io = (void __iomem *)edma_virt + PAGE_SIZE; + ctx->qp_count = qp_count; + ctx->db_phys = readq(&info->db_base); + + ctx->notify_src_virt = dma_alloc_coherent(&ndev->pdev->dev, + sizeof(*ctx->notify_src_virt), + &ctx->notify_src_phys, + GFP_KERNEL); + if (!ctx->notify_src_virt) { + ret = -ENOMEM; + goto err; + } + + off += readl(&info->reg_size); + + for (i = 0; i < ch_cnt; i++) { + chip->ll_region_rd[i].vaddr.io = edma_virt + off; + chip->ll_region_rd[i].paddr = readq(&info->ll_rd_phys[i]); + chip->ll_region_rd[i].sz = DMA_LLP_MEM_SIZE; + off += DMA_LLP_MEM_SIZE; + } + + if (!pci_dev_msi_enabled(ndev->pdev)) { + ret = -ENXIO; + goto err; + } + ret = ntb_dw_edma_reserve_peer_irq_vectors(ndev->pdev, ctx, chip->nr_irqs); + if (ret) { + dev_err(&ndev->dev, "no free MSI vectors for remote eDMA: %d\n", + ret); + goto err; + } + + ret = dw_edma_probe(chip); + if (ret) { + dev_err(&ndev->dev, "dw_edma_probe failed: %d\n", ret); + ntb_dw_edma_ctx_unregister(&ndev->pdev->dev); + goto err; + } + + ctx->peer_chip = chip; + ctx->peer_probed = true; + ctx->peer_initialized = true; + return 0; + +err: + ntb_dw_edma_rc_disconnect(ndev, ctx); + return ret; +} + +static void ntb_dw_edma_ep_unpublish(struct ntb_dev *ndev, void *priv) +{ + struct ntb_edma_ctx *ctx = priv; + struct ntb_dw_edma_info *info; + struct ntb_dw_edma_db *db; + struct device *dev = NULL; + struct pci_epc *epc; + dma_addr_t db_phys, info_phys, mw_pad_phys; + size_t info_bytes, mw_pad_bytes; + void *mw_pad; + + epc = (struct pci_epc *)ntb_get_private_data(ndev); + WARN_ON(!epc); + if (epc) + dev = epc->dev.parent; + + scoped_guard(spinlock_irqsave, &ntb_dw_edma_notify_lock) { + db = ctx->db_virt; + db_phys = ctx->db_phys; + + /* Make callbacks no-op first. */ + ctx->cb = NULL; + ctx->cb_data = NULL; + ctx->db_virt = NULL; + ctx->qp_count = 0; + } + + info = ctx->info_virt; + info_phys = ctx->info_phys; + info_bytes = ctx->info_bytes; + + mw_pad = ctx->mw_pad_virt; + mw_pad_phys = ctx->mw_pad_phys; + mw_pad_bytes = ctx->mw_pad_bytes; + ctx->mw_pad_virt = NULL; + ctx->mw_pad_phys = 0; + ctx->mw_pad_bytes = 0; + + /* Disconnect the MW before freeing its backing memory */ + if (ctx->mw_trans_set && ctx->mw_index != NTB_DW_EDMA_MW_IDX_INVALID) + ntb_mw_clear_trans(ndev, 0, ctx->mw_index); + + ntb_dw_edma_undelegate_chans(ctx); + + if (ctx->reg_mapped) + iommu_unmap(ctx->iommu_dom, ctx->reg_iova, ctx->reg_iova_size); + + if (db && dev) + dma_free_coherent(dev, sizeof(*db), db, db_phys); + + if (info && dev && info_bytes) + dma_free_coherent(dev, info_bytes, info, info_phys); + + if (mw_pad && dev && mw_pad_bytes) + dma_free_coherent(dev, mw_pad_bytes, mw_pad, mw_pad_phys); + + ntb_dw_edma_ctx_reset(ctx); +} + +static void ntb_dw_edma_tx_chans_deinit(struct ntb_edma_chans *edma) +{ + unsigned int i; + + if (!edma) + return; + + for (i = 0; i < NTB_EDMA_CH_NUM; i++) { + if (!edma->chan[i]) + continue; + dmaengine_terminate_sync(edma->chan[i]); + dma_release_channel(edma->chan[i]); + edma->chan[i] = NULL; + } + edma->num_chans = 0; + + if (edma->intr_chan) { + dmaengine_terminate_sync(edma->intr_chan); + dma_release_channel(edma->intr_chan); + edma->intr_chan = NULL; + } + + atomic_set(&edma->cur_chan, 0); +} + +static int ntb_dw_edma_setup_intr_chan(struct device *dev, + struct ntb_edma_chans *edma, void *priv) +{ + struct ntb_edma_ctx *ctx = priv; + struct ntb_edma_filter filter; + dma_cap_mask_t dma_mask; + struct dma_slave_config cfg; + struct scatterlist *sgl = &ctx->sgl; + int rc; + + if (edma->intr_chan) + return 0; + + if (!ctx->notify_src_virt || !ctx->db_phys) + return -EINVAL; + + dma_cap_zero(dma_mask); + dma_cap_set(DMA_SLAVE, dma_mask); + + filter.dma_dev = dev; + filter.direction = BIT(DMA_MEM_TO_DEV); + + edma->intr_chan = dma_request_channel(dma_mask, ntb_dw_edma_filter_fn, + &filter); + if (!edma->intr_chan) { + dev_warn(dev, + "Remote eDMA notify channel could not be allocated\n"); + return -ENODEV; + } + + rc = dw_edma_chan_irq_config(edma->intr_chan, DW_EDMA_CH_IRQ_LOCAL); + if (rc) + goto err_release; + + /* Ensure store is visible before kicking DMA transfer */ + wmb(); + + sg_init_table(sgl, 1); + sg_dma_address(sgl) = ctx->notify_src_phys; + sg_dma_len(sgl) = sizeof(u32); + + memset(&cfg, 0, sizeof(cfg)); + cfg.dst_addr = ctx->db_phys; /* The first 32bit is 'target' */ + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cfg.direction = DMA_MEM_TO_DEV; + + rc = dmaengine_slave_config(edma->intr_chan, &cfg); + if (rc) + goto err_release; + + return 0; + +err_release: + dma_release_channel(edma->intr_chan); + edma->intr_chan = NULL; + return rc; +} + +static int ntb_dw_edma_tx_chans_init(struct ntb_dev *ndev, void *priv, + struct ntb_edma_chans *edma, bool remote) +{ + struct device *dev = ntb_get_dma_dev(ndev); + struct ntb_edma_filter filter; + dma_cap_mask_t dma_mask; + unsigned int i; + int rc; + + dma_cap_zero(dma_mask); + dma_cap_set(DMA_SLAVE, dma_mask); + + memset(edma, 0, sizeof(*edma)); + edma->dev = dev; + + mutex_init(&edma->lock); + + filter.dma_dev = dev; + filter.direction = BIT(DMA_MEM_TO_DEV); + for (i = 0; i < NTB_EDMA_CH_NUM; i++) { + edma->chan[i] = dma_request_channel(dma_mask, + ntb_dw_edma_filter_fn, + &filter); + if (!edma->chan[i]) + break; + edma->num_chans++; + + if (remote) + rc = dw_edma_chan_irq_config(edma->chan[i], + DW_EDMA_CH_IRQ_REMOTE); + else + rc = dw_edma_chan_irq_config(edma->chan[i], + DW_EDMA_CH_IRQ_LOCAL); + + if (rc) { + dev_err(dev, "irq config failed on ch%u: %d\n", i, rc); + goto err; + } + } + + if (!edma->num_chans) { + dev_warn(dev, "Remote eDMA channels failed to initialize\n"); + ntb_dw_edma_tx_chans_deinit(edma); + return -ENODEV; + } + + if (remote) { + rc = ntb_dw_edma_setup_intr_chan(dev, edma, priv); + if (rc) + goto err; + } + return 0; +err: + ntb_dw_edma_tx_chans_deinit(edma); + return rc; +} + +static int ntb_dw_edma_notify_peer(struct ntb_edma_chans *edma, void *priv, + int qp_num) +{ + struct ntb_edma_ctx *ctx = priv; + struct dma_async_tx_descriptor *txd; + dma_cookie_t cookie; + + if (!edma || !edma->intr_chan) + return -ENXIO; + + if (qp_num < 0 || qp_num >= ctx->qp_count) + return -EINVAL; + + if (!ctx->db_io) + return -EINVAL; + + guard(mutex)(&edma->lock); + + writel(1, &ctx->db_io->db[qp_num]); + + /* Ensure store is visible before kicking the DMA transfer */ + wmb(); + + txd = dmaengine_prep_slave_sg(edma->intr_chan, &ctx->sgl, 1, + DMA_MEM_TO_DEV, + DMA_CTRL_ACK | DMA_PREP_INTERRUPT); + if (!txd) + return -ENOSPC; + + cookie = dmaengine_submit(txd); + if (dma_submit_error(cookie)) + return -ENOSPC; + + dma_async_issue_pending(edma->intr_chan); + return 0; +} + +static const struct ntb_edma_backend_ops ntb_dw_edma_backend_ops = { + .match = ntb_dw_edma_match, + .alloc = ntb_dw_edma_alloc, + .free = ntb_dw_edma_free, + + .ep_publish = ntb_dw_edma_ep_publish, + .ep_unpublish = ntb_dw_edma_ep_unpublish, + .rc_connect = ntb_dw_edma_rc_connect, + .rc_disconnect = ntb_dw_edma_rc_disconnect, + + .tx_chans_init = ntb_dw_edma_tx_chans_init, + .tx_chans_deinit = ntb_dw_edma_tx_chans_deinit, + .notify_peer = ntb_dw_edma_notify_peer, +}; + +static struct ntb_edma_backend ntb_dw_edma_backend = { + .name = "dw-edma", + .ops = &ntb_dw_edma_backend_ops, + .owner = THIS_MODULE, +}; + +static int __init ntb_dw_edma_init(void) +{ + return ntb_edma_backend_register(&ntb_dw_edma_backend); +} +module_init(ntb_dw_edma_init); + +static void __exit ntb_dw_edma_exit(void) +{ + ntb_edma_backend_unregister(&ntb_dw_edma_backend); +} +module_exit(ntb_dw_edma_exit); + +MODULE_DESCRIPTION("NTB DW EPC eDMA backend"); +MODULE_LICENSE("Dual BSD/GPL"); -- 2.51.0
