From: Jie Liu <[email protected]>

During memory hotplug events, the SXE2 driver needs to track memory
segment layout changes to maintain internal DMA mappings. However,
existing memseg walk functions (rte_memseg_walk) acquire memory locks
and cannot be called from within memory event callbacks, leading to
potential deadlocks.

The implementation follows the standard rte_memseg_walk_t prototype,
processing each memseg to update driver-specific data structures.

Signed-off-by: Jie Liu <[email protected]>
---
 drivers/common/sxe2/sxe2_common.c     | 110 ++++++++++++++++++++++++++
 drivers/common/sxe2/sxe2_common.h     |   2 +
 drivers/common/sxe2/sxe2_ioctl_chnl.c |   2 +-
 3 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/drivers/common/sxe2/sxe2_common.c 
b/drivers/common/sxe2/sxe2_common.c
index c000a55cd0..5c5db85f29 100644
--- a/drivers/common/sxe2/sxe2_common.c
+++ b/drivers/common/sxe2/sxe2_common.c
@@ -196,6 +196,102 @@ static int32_t sxe2_parse_representor(const char *key, 
const char *value, void *
 
        PMD_LOG_INFO(COM, "representor arg %s: \"%s\".", key, value);
 
+l_end:
+       return ret;
+}
+static int32_t sxe2_dma_mem_map(struct sxe2_common_device *cdev,
+                               const void *addr, size_t len, bool do_map)
+{
+       struct rte_memseg_list *msl;
+       struct rte_memseg *ms;
+       size_t cur_len = 0;
+       int32_t ret = 0;
+
+       msl = rte_mem_virt2memseg_list(addr);
+       if (msl == NULL) {
+               ret = -EINVAL;
+               PMD_LOG_ERR(COM, "Invalid virt addr=%p.", addr);
+               goto l_end;
+       }
+
+       if ((uintptr_t)addr != RTE_ALIGN((uintptr_t)addr, msl->page_sz) ||
+               (len != RTE_ALIGN(len, msl->page_sz))) {
+               ret = -EINVAL;
+               PMD_LOG_ERR(COM, "Addr=%p and len=%zu not align page size=%" 
PRIu64 ".",
+                           addr, len, msl->page_sz);
+               goto l_end;
+       }
+
+       /* memsegs are contiguous in memory */
+       ms = rte_mem_virt2memseg(addr, msl);
+       while (cur_len < len) {
+               /* some memory segments may have invalid IOVA */
+               if (ms->iova == RTE_BAD_IOVA) {
+                       PMD_LOG_WARN(COM, "Memory segment at %p has bad IOVA, 
skipping.",
+                                       ms->addr);
+                       goto next;
+               }
+               if (do_map)
+                       sxe2_drv_dev_dma_map(cdev, ms->addr_64,
+                                       ms->iova, ms->len);
+               else
+                       sxe2_drv_dev_dma_unmap(cdev, ms->iova);
+
+next:
+               cur_len += ms->len;
+               ++ms;
+       }
+
+l_end:
+       return ret;
+}
+
+RTE_EXPORT_INTERNAL_SYMBOL(sxe2_common_mem_event_cb)
+void
+sxe2_common_mem_event_cb(enum rte_mem_event type,
+               const void *addr, size_t size, void *arg __rte_unused)
+{
+       struct sxe2_common_device *cdev = NULL;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               goto l_end;
+
+       pthread_mutex_lock(&sxe2_common_devices_list_lock);
+       switch (type) {
+       case RTE_MEM_EVENT_FREE:
+               TAILQ_FOREACH(cdev, &sxe2_common_devices_list, next)
+                       (void)sxe2_dma_mem_map(cdev, addr, size, 0);
+               break;
+       case RTE_MEM_EVENT_ALLOC:
+               TAILQ_FOREACH(cdev, &sxe2_common_devices_list, next)
+                       (void)sxe2_dma_mem_map(cdev, addr, size, 1);
+               break;
+       default:
+               break;
+       }
+       pthread_mutex_unlock(&sxe2_common_devices_list_lock);
+l_end:
+       return;
+}
+
+static int32_t sxe2_memseg_walk_cb(const struct rte_memseg_list *msl,
+                                  const struct rte_memseg *ms, void *arg)
+{
+       struct sxe2_common_device *cdev = arg;
+       int32_t ret = 0;
+
+       if (msl->external && !msl->heap)
+               goto l_end;
+
+       if (ms->iova == RTE_BAD_IOVA)
+               goto l_end;
+
+       ret = sxe2_drv_dev_dma_map(cdev, ms->addr_64, ms->iova, ms->len);
+       if (ret != 0) {
+               PMD_LOG_ERR(COM, "Fail to memseg dma map.");
+               goto l_end;
+       }
+
 l_end:
        return ret;
 }
@@ -220,6 +316,18 @@ static int32_t sxe2_common_device_setup(struct 
sxe2_common_device *cdev)
                goto l_close_dev;
        }
 
+       rte_mcfg_mem_read_lock();
+       ret = rte_memseg_walk_thread_unsafe(sxe2_memseg_walk_cb, cdev);
+       if (ret) {
+               PMD_LOG_ERR(COM, "Fail to walk memseg, ret=%d", ret);
+               rte_mcfg_mem_read_unlock();
+               goto l_close_dev;
+       }
+       rte_mcfg_mem_read_unlock();
+
+       (void)rte_mem_event_callback_register("SXE2_MEM_EVENT_CB",
+                       sxe2_common_mem_event_cb, NULL);
+
        goto l_end;
 
 l_close_dev:
@@ -251,6 +359,7 @@ static struct sxe2_common_device *sxe2_common_device_alloc(
        }
        cdev->dev = rte_dev;
        cdev->class_type = class_type;
+       cdev->config.cmd_fd = SXE2_CMD_FD_INVALID;
        cdev->config.kernel_reset = false;
        pthread_mutex_init(&cdev->config.lock, NULL);
 
@@ -631,6 +740,7 @@ static int32_t sxe2_common_pci_id_table_update(const struct 
rte_pci_id *id_table
 
        updated_table = calloc(num_ids, sizeof(*updated_table));
        if (!updated_table) {
+               ret = -ENOMEM;
                PMD_LOG_ERR(COM, "Failed to allocate memory for PCI ID table");
                goto l_end;
        }
diff --git a/drivers/common/sxe2/sxe2_common.h 
b/drivers/common/sxe2/sxe2_common.h
index b02b6317da..efc8d3585a 100644
--- a/drivers/common/sxe2/sxe2_common.h
+++ b/drivers/common/sxe2/sxe2_common.h
@@ -14,6 +14,8 @@
 
 #define SXE2_COMMON_PCI_DRIVER_NAME "sxe2_pci"
 
+#define SXE2_CMD_FD_INVALID (-1)
+
 #define SXE2_CDEV_TO_CMD_FD(cdev) \
        ((cdev)->config.cmd_fd)
 
diff --git a/drivers/common/sxe2/sxe2_ioctl_chnl.c 
b/drivers/common/sxe2/sxe2_ioctl_chnl.c
index 173d8d57ae..a233a78136 100644
--- a/drivers/common/sxe2/sxe2_ioctl_chnl.c
+++ b/drivers/common/sxe2/sxe2_ioctl_chnl.c
@@ -110,7 +110,7 @@ sxe2_drv_dev_close(struct sxe2_common_device *cdev)
        if (fd >= 0)
                close(fd);
        PMD_LOG_INFO(COM, "closed device fd=%d", fd);
-       SXE2_CDEV_TO_CMD_FD(cdev) = -1;
+       SXE2_CDEV_TO_CMD_FD(cdev) = SXE2_CMD_FD_INVALID;
 }
 
 RTE_EXPORT_INTERNAL_SYMBOL(sxe2_drv_dev_handshake)
-- 
2.52.0

Reply via email to