[PATCH v5 0/8] vdpa/ifc: add multi queue support
v5: fix some commit message. rework some code logic. v4: fix some commit message. add some commets to code. fix some code to reduce confusion. v3: rename device ID macro name. fix some patch title and commit message. delete some used marco. rework some code logic. v2: fix some coding style issue. support dynamic enable/disable queue at run time. Andy Pei (6): vdpa/ifc: add multi-queue support vdpa/ifc: set max queues based on virtio spec vdpa/ifc: write queue count to MQ register vdpa/ifc: only configure enabled queue vhost: vDPA blk device gets ready when the first queue is ready vhost: improve vDPA blk device configure condition Huang Wei (2): vdpa/ifc: add new device ID for legacy network device vdpa/ifc: support dynamic enable/disable queue drivers/vdpa/ifc/base/ifcvf.c | 144 ++ drivers/vdpa/ifc/base/ifcvf.h | 16 - drivers/vdpa/ifc/ifcvf_vdpa.c | 142 +++-- lib/vhost/vhost_user.c| 44 +++-- 4 files changed, 315 insertions(+), 31 deletions(-) -- 1.8.3.1
[PATCH v5 1/8] vdpa/ifc: add new device ID for legacy network device
From: Huang Wei Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000). Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID". Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/base/ifcvf.h | 6 -- drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 9d95aac..ef7697a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -12,12 +12,14 @@ #define IFCVF_BLK 1 #define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_NET_DEVICE_ID 0x1041 +#define IFCVF_NET_MODERN_DEVICE_ID 0x1041 #define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 #define IFCVF_SUBSYS_DEVICE_ID 0x001A -#define IFCVF_BLK_DEVICE_ID 0x0002 +#define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 +#define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 #define IFCVF_MAX_QUEUES 1 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index d5ac583..b4389a0 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = { static const struct rte_pci_id pci_id_ifcvf_map[] = { { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, - .device_id = IFCVF_NET_DEVICE_ID, + .device_id = IFCVF_NET_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, + .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID, + .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, + .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID, + }, + + { .class_id = RTE_CLASS_ANY_ID, + .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .vendor_id = 0, /* sentinel */ -- 1.8.3.1
[PATCH v5 2/8] vdpa/ifc: add multi-queue support
Enable VHOST_USER_PROTOCOL_F_MQ feature. Expose IFCVF_MQ_OFFSET register to enable multi-queue. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 9 + drivers/vdpa/ifc/base/ifcvf.h | 2 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index f1e1474..81c68c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -90,6 +90,15 @@ if (!hw->lm_cfg) WARNINGOUT("HW support live migration not support!\n"); + /* For some hardware implementation, for example: +* the BAR 4 of PF is NULL, while BAR 4 of VF is not. +* This code makes sure hw->mq_cfg is a valid address. +*/ + if (hw->mem_resource[4].addr) + hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET; + else + hw->mq_cfg = NULL; + if (hw->common_cfg == NULL || hw->notify_base == NULL || hw->isr == NULL || hw->dev_cfg == NULL) { DEBUGOUT("capability incomplete\n"); diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index ef7697a..d16d9ab 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -50,6 +50,7 @@ #define IFCVF_LM_CFG_SIZE 0x40 #define IFCVF_LM_RING_STATE_OFFSET 0x20 +#define IFCVF_MQ_OFFSET0x28 #define IFCVF_LM_LOGGING_CTRL 0x0 @@ -149,6 +150,7 @@ struct ifcvf_hw { u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; u8 *lm_cfg; + u8 *mq_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; int device_type; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index b4389a0..008cf89 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \ 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ +1ULL << VHOST_USER_PROTOCOL_F_MQ | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) #define VDPA_BLK_PROTOCOL_FEATURES \ -- 1.8.3.1
[PATCH v5 3/8] vdpa/ifc: set max queues based on virtio spec
Set max_queues according to virtio spec. For virtio BLK device, set max_queues to the value of num_queues in struct virtio_blk_config. For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg, get the queue pair number using num_queues and set max_queues to it. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.h | 2 +- drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index d16d9ab..1e133c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -21,7 +21,7 @@ #define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 #define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 -#define IFCVF_MAX_QUEUES 1 +#define IFCVF_MAX_QUEUES 32 #ifndef VIRTIO_F_IOMMU_PLATFORM #define VIRTIO_F_IOMMU_PLATFORM33 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 008cf89..5a24204 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -26,6 +26,12 @@ #include "base/ifcvf.h" +/* + * RTE_MIN() cannot be used since braced-group within expression allowed + * only inside a function. + */ +#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2)) + RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE); #define DRV_LOG(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \ @@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = { uint64_t capacity = 0; uint8_t *byte; uint32_t i; + uint16_t queue_pairs; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->configured = 0; - internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); device_id = ifcvf_pci_get_device_type(pci_dev); @@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->hw.device_type = IFCVF_NET; + /* +* ifc device always has CTRL_VQ, +* and supports VIRTIO_NET_F_CTRL_VQ feature. +*/ + queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2; + DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name, + queue_pairs); + internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs); internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; @@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(DEBUG, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + internal->max_queues = MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v5 4/8] vdpa/ifc: write queue count to MQ register
Write queue count to IFCVF_MQ_OFFSET register to enable multi-queue feature. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 81c68c0..b377126 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -202,6 +202,37 @@ IFCVF_WRITE_REG32(val >> 32, hi); } +STATIC void +ifcvf_enable_mq(struct ifcvf_hw *hw) +{ + u8 *mq_cfg; + u8 qid; + int nr_queue = 0; + + for (qid = 0; qid < hw->nr_vring; qid++) { + if (!hw->vring[qid].enable) + continue; + nr_queue++; + } + + if (nr_queue == 0) { + WARNINGOUT("no enabled vring\n"); + return; + } + + mq_cfg = hw->mq_cfg; + if (mq_cfg) { + if (hw->device_type == IFCVF_BLK) { + *(u32 *)mq_cfg = nr_queue; + RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue); + } else { + *(u32 *)mq_cfg = nr_queue / 2; + RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n", + nr_queue / 2); + } + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { @@ -219,6 +250,7 @@ return -1; } + ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, -- 1.8.3.1
[PATCH v5 6/8] vdpa/ifc: support dynamic enable/disable queue
From: Huang Wei Support dynamic enable or disable queue. For front end, like QEMU, user can use ethtool to configure queue. For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs. Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 100 ++ drivers/vdpa/ifc/base/ifcvf.h | 6 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 93 --- 3 files changed, 184 insertions(+), 15 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 30bb8cb..869ddd6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -233,6 +233,106 @@ } } +int +ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u8 *lm_cfg; + u16 notify_off; + int msix_vector; + + if (i >= (int)hw->nr_vring) + return -1; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return -1; + } + + ifcvf_enable_mq(hw); + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector != (i + 1)) { + IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector == IFCVF_MSI_NO_VECTOR) { + RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n", + i); + return -1; + } + } + + io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + &cfg->queue_used_hi); + IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + else + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } + + notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off); + hw->notify_addr[i] = (void *)((u8 *)hw->notify_base + + notify_off * hw->notify_off_multiplier); + IFCVF_WRITE_REG16(1, &cfg->queue_enable); + + return 0; +} + +void +ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u32 ring_state; + u8 *lm_cfg; + + if (i >= (int)hw->nr_vring) + return; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return; + } + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + IFCVF_WRITE_REG16(0, &cfg->queue_enable); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + } else { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + } + hw->vring[i].last_used_idx = (u16)(ring_state >> 16); + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 1e133c0..3726da7 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -164,6 +164,12 @@ struct ifcvf_hw { ifcvf_get_features(struct ifcvf_hw *hw); int +ifcvf_en
[PATCH v5 5/8] vdpa/ifc: only configure enabled queue
When configuring the hardware queue, we only configure queues which have been enabled by vhost. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 3 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index b377126..30bb8cb 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -252,6 +252,9 @@ ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, &cfg->queue_desc_hi); diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 5a24204..0c3407a 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -284,6 +284,8 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &hw->req_features); for (i = 0; i < nr_vring; i++) { + if (!hw->vring[i].enable) + continue; rte_vhost_get_vhost_vring(vid, i, &vq); gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc); if (gpa == 0) { @@ -499,6 +501,8 @@ struct rte_vdpa_dev_info { vring.kickfd = -1; for (qid = 0; qid < q_num; qid++) { + if (!hw->vring[qid].enable) + continue; ev.events = EPOLLIN | EPOLLPRI; rte_vhost_get_vhost_vring(internal->vid, qid, &vring); ev.data.u64 = qid | (uint64_t)vring.kickfd << 32; @@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info { struct rte_vdpa_device *vdev; struct internal_list *list; struct ifcvf_internal *internal; + struct ifcvf_hw *hw; + uint16_t i; vdev = rte_vhost_get_vdpa_device(vid); list = find_internal_resource_by_vdev(vdev); @@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info { rte_atomic32_set(&internal->dev_attached, 1); update_datapath(internal); - if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0) - DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", + hw = &internal->hw; + for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0) + DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", vdev->device->name); + } internal->configured = 1; + DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name); return 0; } -- 1.8.3.1
[PATCH v5 7/8] vhost: vDPA blk device gets ready when the first queue is ready
When boot from virtio blk device, seabios in QEMU only enables one queue. To work in this scenario, vDPA BLK device back-end configure device when any queue is ready. Signed-off-by: Andy Pei Signed-off-by: Huang Wei --- lib/vhost/vhost_user.c | 31 +++ 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index cd65257..f5206dd 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -1441,11 +1441,15 @@ } #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u +#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u static int virtio_is_ready(struct virtio_net *dev) { + struct rte_vdpa_device *vdpa_dev; struct vhost_virtqueue *vq; + uint32_t vdpa_type; + int ret = 0; uint32_t i, nr_vring = dev->nr_vring; if (dev->flags & VIRTIO_DEV_READY) @@ -1454,13 +1458,32 @@ if (!dev->nr_vring) return 0; - if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) { - nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; + vdpa_dev = dev->vdpa_dev; + if (vdpa_dev) { + if (vdpa_dev->ops->get_dev_type) { + ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); + if (ret) { + VHOST_LOG_CONFIG(dev->ifname, ERR, + "failed to get vdpa dev type.\n"); + return -1; + } + } else { + vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; + } + } else { + vdpa_type = -1; + } - if (dev->nr_vring < nr_vring) - return 0; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY; + } else { + if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) + nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; } + if (dev->nr_vring < nr_vring) + return 0; + for (i = 0; i < nr_vring; i++) { vq = dev->virtqueue[i]; -- 1.8.3.1
[PATCH v5 8/8] vhost: improve vDPA blk device configure condition
To support multi-queue, configure device after call fd of all queues are set. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- lib/vhost/vhost_user.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index f5206dd..6b5f89a 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2984,6 +2984,7 @@ static int is_vring_iotlb(struct virtio_net *dev, uint32_t vdpa_type = 0; uint32_t request; uint32_t i; + uint16_t blk_call_fd; dev = get_device(vid); if (dev == NULL) @@ -3203,9 +3204,15 @@ static int is_vring_iotlb(struct virtio_net *dev, } else { vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; } - if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK - && request != VHOST_USER_SET_VRING_CALL) - goto out; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + if (request == VHOST_USER_SET_VRING_CALL) { + blk_call_fd = ctx.msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (blk_call_fd != dev->nr_vring - 1) + goto out; + } else { + goto out; + } + } if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) -- 1.8.3.1
[PATCH v6 0/8] vdpa/ifc: add multi queue support
v6: Add vdpa_device_type to rte_vdpa_device to store vDPA device type. v5: fix some commit message. rework some code logic. v4: fix some commit message. add some commets to code. fix some code to reduce confusion. v3: rename device ID macro name. fix some patch title and commit message. delete some used marco. rework some code logic. v2: fix some coding style issue. support dynamic enable/disable queue at run time. Andy Pei (6): vdpa/ifc: add multi-queue support vdpa/ifc: set max queues based on virtio spec vdpa/ifc: write queue count to MQ register vdpa/ifc: only configure enabled queue vhost: vDPA blk device gets ready when the first queue is ready vhost: improve vDPA blk device configure condition Huang Wei (2): vdpa/ifc: add new device ID for legacy network device vdpa/ifc: support dynamic enable/disable queue drivers/vdpa/ifc/base/ifcvf.c | 144 ++ drivers/vdpa/ifc/base/ifcvf.h | 16 - drivers/vdpa/ifc/ifcvf_vdpa.c | 142 +++-- lib/vhost/vdpa_driver.h | 2 + lib/vhost/vhost_user.c| 62 +- 5 files changed, 325 insertions(+), 41 deletions(-) -- 1.8.3.1
[PATCH v6 1/8] vdpa/ifc: add new device ID for legacy network device
From: Huang Wei Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000). Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID". Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/base/ifcvf.h | 6 -- drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 9d95aac..ef7697a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -12,12 +12,14 @@ #define IFCVF_BLK 1 #define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_NET_DEVICE_ID 0x1041 +#define IFCVF_NET_MODERN_DEVICE_ID 0x1041 #define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 #define IFCVF_SUBSYS_DEVICE_ID 0x001A -#define IFCVF_BLK_DEVICE_ID 0x0002 +#define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 +#define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 #define IFCVF_MAX_QUEUES 1 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index d5ac583..b4389a0 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = { static const struct rte_pci_id pci_id_ifcvf_map[] = { { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, - .device_id = IFCVF_NET_DEVICE_ID, + .device_id = IFCVF_NET_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, + .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID, + .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, + .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID, + }, + + { .class_id = RTE_CLASS_ANY_ID, + .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .vendor_id = 0, /* sentinel */ -- 1.8.3.1
[PATCH v6 2/8] vdpa/ifc: add multi-queue support
Enable VHOST_USER_PROTOCOL_F_MQ feature. Expose IFCVF_MQ_OFFSET register to enable multi-queue. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 9 + drivers/vdpa/ifc/base/ifcvf.h | 2 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index f1e1474..81c68c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -90,6 +90,15 @@ if (!hw->lm_cfg) WARNINGOUT("HW support live migration not support!\n"); + /* For some hardware implementation, for example: +* the BAR 4 of PF is NULL, while BAR 4 of VF is not. +* This code makes sure hw->mq_cfg is a valid address. +*/ + if (hw->mem_resource[4].addr) + hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET; + else + hw->mq_cfg = NULL; + if (hw->common_cfg == NULL || hw->notify_base == NULL || hw->isr == NULL || hw->dev_cfg == NULL) { DEBUGOUT("capability incomplete\n"); diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index ef7697a..d16d9ab 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -50,6 +50,7 @@ #define IFCVF_LM_CFG_SIZE 0x40 #define IFCVF_LM_RING_STATE_OFFSET 0x20 +#define IFCVF_MQ_OFFSET0x28 #define IFCVF_LM_LOGGING_CTRL 0x0 @@ -149,6 +150,7 @@ struct ifcvf_hw { u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; u8 *lm_cfg; + u8 *mq_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; int device_type; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index b4389a0..008cf89 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \ 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ +1ULL << VHOST_USER_PROTOCOL_F_MQ | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) #define VDPA_BLK_PROTOCOL_FEATURES \ -- 1.8.3.1
[PATCH v6 3/8] vdpa/ifc: set max queues based on virtio spec
Set max_queues according to virtio spec. For virtio BLK device, set max_queues to the value of num_queues in struct virtio_blk_config. For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg, get the queue pair number using num_queues and set max_queues to it. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.h | 2 +- drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index d16d9ab..1e133c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -21,7 +21,7 @@ #define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 #define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 -#define IFCVF_MAX_QUEUES 1 +#define IFCVF_MAX_QUEUES 32 #ifndef VIRTIO_F_IOMMU_PLATFORM #define VIRTIO_F_IOMMU_PLATFORM33 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 008cf89..5a24204 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -26,6 +26,12 @@ #include "base/ifcvf.h" +/* + * RTE_MIN() cannot be used since braced-group within expression allowed + * only inside a function. + */ +#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2)) + RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE); #define DRV_LOG(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \ @@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = { uint64_t capacity = 0; uint8_t *byte; uint32_t i; + uint16_t queue_pairs; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->configured = 0; - internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); device_id = ifcvf_pci_get_device_type(pci_dev); @@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->hw.device_type = IFCVF_NET; + /* +* ifc device always has CTRL_VQ, +* and supports VIRTIO_NET_F_CTRL_VQ feature. +*/ + queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2; + DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name, + queue_pairs); + internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs); internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; @@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(DEBUG, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + internal->max_queues = MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v6 4/8] vdpa/ifc: write queue count to MQ register
Write queue count to IFCVF_MQ_OFFSET register to enable multi-queue feature. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 81c68c0..b377126 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -202,6 +202,37 @@ IFCVF_WRITE_REG32(val >> 32, hi); } +STATIC void +ifcvf_enable_mq(struct ifcvf_hw *hw) +{ + u8 *mq_cfg; + u8 qid; + int nr_queue = 0; + + for (qid = 0; qid < hw->nr_vring; qid++) { + if (!hw->vring[qid].enable) + continue; + nr_queue++; + } + + if (nr_queue == 0) { + WARNINGOUT("no enabled vring\n"); + return; + } + + mq_cfg = hw->mq_cfg; + if (mq_cfg) { + if (hw->device_type == IFCVF_BLK) { + *(u32 *)mq_cfg = nr_queue; + RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue); + } else { + *(u32 *)mq_cfg = nr_queue / 2; + RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n", + nr_queue / 2); + } + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { @@ -219,6 +250,7 @@ return -1; } + ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, -- 1.8.3.1
[PATCH v6 5/8] vdpa/ifc: only configure enabled queue
When configuring the hardware queue, we only configure queues which have been enabled by vhost. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 3 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index b377126..30bb8cb 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -252,6 +252,9 @@ ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, &cfg->queue_desc_hi); diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 5a24204..0c3407a 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -284,6 +284,8 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &hw->req_features); for (i = 0; i < nr_vring; i++) { + if (!hw->vring[i].enable) + continue; rte_vhost_get_vhost_vring(vid, i, &vq); gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc); if (gpa == 0) { @@ -499,6 +501,8 @@ struct rte_vdpa_dev_info { vring.kickfd = -1; for (qid = 0; qid < q_num; qid++) { + if (!hw->vring[qid].enable) + continue; ev.events = EPOLLIN | EPOLLPRI; rte_vhost_get_vhost_vring(internal->vid, qid, &vring); ev.data.u64 = qid | (uint64_t)vring.kickfd << 32; @@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info { struct rte_vdpa_device *vdev; struct internal_list *list; struct ifcvf_internal *internal; + struct ifcvf_hw *hw; + uint16_t i; vdev = rte_vhost_get_vdpa_device(vid); list = find_internal_resource_by_vdev(vdev); @@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info { rte_atomic32_set(&internal->dev_attached, 1); update_datapath(internal); - if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0) - DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", + hw = &internal->hw; + for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0) + DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", vdev->device->name); + } internal->configured = 1; + DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name); return 0; } -- 1.8.3.1
[PATCH v6 7/8] vhost: vDPA blk device gets ready when the first queue is ready
When boot from virtio blk device, seabios in QEMU only enables one queue. To work in this scenario, vDPA BLK device back-end configure device when any queue is ready. Add vdpa_device_type to rte_vdpa_device to store vDPA device type. Signed-off-by: Andy Pei Signed-off-by: Huang Wei --- lib/vhost/vdpa_driver.h | 2 ++ lib/vhost/vhost_user.c | 55 ++--- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index 8b88a53..c4ec222 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -92,6 +92,8 @@ struct rte_vdpa_device { struct rte_device *device; /** vdpa device operations */ struct rte_vdpa_dev_ops *ops; + /** vdpa device type: net, blk... */ + uint32_t vdpa_device_type; }; /** diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index cd65257..53806fa 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -1441,11 +1441,14 @@ } #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u +#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u static int virtio_is_ready(struct virtio_net *dev) { + struct rte_vdpa_device *vdpa_dev; struct vhost_virtqueue *vq; + uint32_t vdpa_type; uint32_t i, nr_vring = dev->nr_vring; if (dev->flags & VIRTIO_DEV_READY) @@ -1454,13 +1457,22 @@ if (!dev->nr_vring) return 0; - if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) { - nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; + vdpa_dev = dev->vdpa_dev; + if (vdpa_dev) + vdpa_type = vdpa_dev->vdpa_device_type; + else + vdpa_type = -1; - if (dev->nr_vring < nr_vring) - return 0; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY; + } else { + if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) + nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; } + if (dev->nr_vring < nr_vring) + return 0; + for (i = 0; i < nr_vring; i++) { vq = dev->virtqueue[i]; @@ -2958,7 +2970,7 @@ static int is_vring_iotlb(struct virtio_net *dev, int ret; int unlock_required = 0; bool handled; - uint32_t vdpa_type = 0; + uint32_t vdpa_type = -1; uint32_t request; uint32_t i; @@ -3152,7 +3164,27 @@ static int is_vring_iotlb(struct virtio_net *dev, if (unlock_required) vhost_user_unlock_all_queue_pairs(dev); - if (ret != 0 || !virtio_is_ready(dev)) + if (ret != 0) + goto out; + + vdpa_dev = dev->vdpa_dev; + if (vdpa_dev) { + if (vdpa_dev->ops->get_dev_type) { + ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); + if (ret) { + VHOST_LOG_CONFIG(dev->ifname, ERR, + "failed to get vdpa dev type.\n"); + ret = -1; + goto out; + } + } else { + vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; + } + + vdpa_dev->vdpa_device_type = vdpa_type; + } + + if (!virtio_is_ready(dev)) goto out; /* @@ -3166,20 +3198,9 @@ static int is_vring_iotlb(struct virtio_net *dev, dev->flags |= VIRTIO_DEV_RUNNING; } - vdpa_dev = dev->vdpa_dev; if (!vdpa_dev) goto out; - if (vdpa_dev->ops->get_dev_type) { - ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); - if (ret) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa dev type.\n"); - ret = -1; - goto out; - } - } else { - vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; - } if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK && request != VHOST_USER_SET_VRING_CALL) goto out; -- 1.8.3.1
[PATCH v6 6/8] vdpa/ifc: support dynamic enable/disable queue
From: Huang Wei Support dynamic enable or disable queue. For front end, like QEMU, user can use ethtool to configure queue. For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs. Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 100 ++ drivers/vdpa/ifc/base/ifcvf.h | 6 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 93 --- 3 files changed, 184 insertions(+), 15 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 30bb8cb..869ddd6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -233,6 +233,106 @@ } } +int +ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u8 *lm_cfg; + u16 notify_off; + int msix_vector; + + if (i >= (int)hw->nr_vring) + return -1; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return -1; + } + + ifcvf_enable_mq(hw); + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector != (i + 1)) { + IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector == IFCVF_MSI_NO_VECTOR) { + RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n", + i); + return -1; + } + } + + io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + &cfg->queue_used_hi); + IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + else + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } + + notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off); + hw->notify_addr[i] = (void *)((u8 *)hw->notify_base + + notify_off * hw->notify_off_multiplier); + IFCVF_WRITE_REG16(1, &cfg->queue_enable); + + return 0; +} + +void +ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u32 ring_state; + u8 *lm_cfg; + + if (i >= (int)hw->nr_vring) + return; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return; + } + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + IFCVF_WRITE_REG16(0, &cfg->queue_enable); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + } else { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + } + hw->vring[i].last_used_idx = (u16)(ring_state >> 16); + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 1e133c0..3726da7 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -164,6 +164,12 @@ struct ifcvf_hw { ifcvf_get_features(struct ifcvf_hw *hw); int +ifcvf_en
[PATCH v6 8/8] vhost: improve vDPA blk device configure condition
To support multi-queue, configure device after call fd of all queues are set. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- lib/vhost/vhost_user.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 53806fa..2c50d13 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2973,6 +2973,7 @@ static int is_vring_iotlb(struct virtio_net *dev, uint32_t vdpa_type = -1; uint32_t request; uint32_t i; + uint16_t blk_call_fd; dev = get_device(vid); if (dev == NULL) @@ -3201,9 +3202,15 @@ static int is_vring_iotlb(struct virtio_net *dev, if (!vdpa_dev) goto out; - if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK - && request != VHOST_USER_SET_VRING_CALL) - goto out; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + if (request == VHOST_USER_SET_VRING_CALL) { + blk_call_fd = ctx.msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (blk_call_fd != dev->nr_vring - 1) + goto out; + } else { + goto out; + } + } if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) -- 1.8.3.1
[PATCH v7 00/12] vdpa/ifc: add multi queue support
v7: Fill vdpa_device_type in vdpa device registration. v6: Add vdpa_device_type to rte_vdpa_device to store vDPA device type. v5: fix some commit message. rework some code logic. v4: fix some commit message. add some commets to code. fix some code to reduce confusion. v3: rename device ID macro name. fix some patch title and commit message. delete some used marco. rework some code logic. v2: fix some coding style issue. support dynamic enable/disable queue at run time. Andy Pei (10): vdpa/ifc: add multi-queue support vdpa/ifc: set max queues based on virtio spec vdpa/ifc: write queue count to MQ register vdpa/ifc: only configure enabled queue vdpa/ifc: change internal function name vdpa/ifc: add internal API to get device. vdpa/ifc: change some driver logic vhost: add vdpa device type to rte vdpa device vhost: vDPA blk device gets ready when the first queue is ready vhost: improve vDPA blk device configure condition Huang Wei (2): vdpa/ifc: add new device ID for legacy network device vdpa/ifc: support dynamic enable/disable queue drivers/vdpa/ifc/base/ifcvf.c | 144 drivers/vdpa/ifc/base/ifcvf.h | 16 +++- drivers/vdpa/ifc/ifcvf_vdpa.c | 185 +++--- lib/vhost/socket.c| 15 +--- lib/vhost/vdpa.c | 17 lib/vhost/vdpa_driver.h | 2 + lib/vhost/vhost_user.c| 40 + 7 files changed, 358 insertions(+), 61 deletions(-) -- 1.8.3.1
[PATCH v7 01/12] vdpa/ifc: add new device ID for legacy network device
From: Huang Wei Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000). Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID". Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/base/ifcvf.h | 6 -- drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 9d95aac..ef7697a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -12,12 +12,14 @@ #define IFCVF_BLK 1 #define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_NET_DEVICE_ID 0x1041 +#define IFCVF_NET_MODERN_DEVICE_ID 0x1041 #define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 #define IFCVF_SUBSYS_DEVICE_ID 0x001A -#define IFCVF_BLK_DEVICE_ID 0x0002 +#define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 +#define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 #define IFCVF_MAX_QUEUES 1 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index d5ac583..b4389a0 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = { static const struct rte_pci_id pci_id_ifcvf_map[] = { { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, - .device_id = IFCVF_NET_DEVICE_ID, + .device_id = IFCVF_NET_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, + .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID, + .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, + .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID, + }, + + { .class_id = RTE_CLASS_ANY_ID, + .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .vendor_id = 0, /* sentinel */ -- 1.8.3.1
[PATCH v7 02/12] vdpa/ifc: add multi-queue support
Enable VHOST_USER_PROTOCOL_F_MQ feature. Expose IFCVF_MQ_OFFSET register to enable multi-queue. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 9 + drivers/vdpa/ifc/base/ifcvf.h | 2 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index f1e1474..81c68c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -90,6 +90,15 @@ if (!hw->lm_cfg) WARNINGOUT("HW support live migration not support!\n"); + /* For some hardware implementation, for example: +* the BAR 4 of PF is NULL, while BAR 4 of VF is not. +* This code makes sure hw->mq_cfg is a valid address. +*/ + if (hw->mem_resource[4].addr) + hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET; + else + hw->mq_cfg = NULL; + if (hw->common_cfg == NULL || hw->notify_base == NULL || hw->isr == NULL || hw->dev_cfg == NULL) { DEBUGOUT("capability incomplete\n"); diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index ef7697a..d16d9ab 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -50,6 +50,7 @@ #define IFCVF_LM_CFG_SIZE 0x40 #define IFCVF_LM_RING_STATE_OFFSET 0x20 +#define IFCVF_MQ_OFFSET0x28 #define IFCVF_LM_LOGGING_CTRL 0x0 @@ -149,6 +150,7 @@ struct ifcvf_hw { u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; u8 *lm_cfg; + u8 *mq_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; int device_type; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index b4389a0..008cf89 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \ 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ +1ULL << VHOST_USER_PROTOCOL_F_MQ | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) #define VDPA_BLK_PROTOCOL_FEATURES \ -- 1.8.3.1
[PATCH v7 03/12] vdpa/ifc: set max queues based on virtio spec
Set max_queues according to virtio spec. For virtio BLK device, set max_queues to the value of num_queues in struct virtio_blk_config. For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg, get the queue pair number using num_queues and set max_queues to it. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.h | 2 +- drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index d16d9ab..1e133c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -21,7 +21,7 @@ #define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 #define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 -#define IFCVF_MAX_QUEUES 1 +#define IFCVF_MAX_QUEUES 32 #ifndef VIRTIO_F_IOMMU_PLATFORM #define VIRTIO_F_IOMMU_PLATFORM33 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 008cf89..5a24204 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -26,6 +26,12 @@ #include "base/ifcvf.h" +/* + * RTE_MIN() cannot be used since braced-group within expression allowed + * only inside a function. + */ +#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2)) + RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE); #define DRV_LOG(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \ @@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = { uint64_t capacity = 0; uint8_t *byte; uint32_t i; + uint16_t queue_pairs; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->configured = 0; - internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); device_id = ifcvf_pci_get_device_type(pci_dev); @@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->hw.device_type = IFCVF_NET; + /* +* ifc device always has CTRL_VQ, +* and supports VIRTIO_NET_F_CTRL_VQ feature. +*/ + queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2; + DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name, + queue_pairs); + internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs); internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; @@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(DEBUG, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + internal->max_queues = MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v7 04/12] vdpa/ifc: write queue count to MQ register
Write queue count to IFCVF_MQ_OFFSET register to enable multi-queue feature. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 81c68c0..b377126 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -202,6 +202,37 @@ IFCVF_WRITE_REG32(val >> 32, hi); } +STATIC void +ifcvf_enable_mq(struct ifcvf_hw *hw) +{ + u8 *mq_cfg; + u8 qid; + int nr_queue = 0; + + for (qid = 0; qid < hw->nr_vring; qid++) { + if (!hw->vring[qid].enable) + continue; + nr_queue++; + } + + if (nr_queue == 0) { + WARNINGOUT("no enabled vring\n"); + return; + } + + mq_cfg = hw->mq_cfg; + if (mq_cfg) { + if (hw->device_type == IFCVF_BLK) { + *(u32 *)mq_cfg = nr_queue; + RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue); + } else { + *(u32 *)mq_cfg = nr_queue / 2; + RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n", + nr_queue / 2); + } + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { @@ -219,6 +250,7 @@ return -1; } + ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, -- 1.8.3.1
[PATCH v7 05/12] vdpa/ifc: only configure enabled queue
When configuring the hardware queue, we only configure queues which have been enabled by vhost. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 3 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index b377126..30bb8cb 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -252,6 +252,9 @@ ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, &cfg->queue_desc_hi); diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 5a24204..0c3407a 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -284,6 +284,8 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &hw->req_features); for (i = 0; i < nr_vring; i++) { + if (!hw->vring[i].enable) + continue; rte_vhost_get_vhost_vring(vid, i, &vq); gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc); if (gpa == 0) { @@ -499,6 +501,8 @@ struct rte_vdpa_dev_info { vring.kickfd = -1; for (qid = 0; qid < q_num; qid++) { + if (!hw->vring[qid].enable) + continue; ev.events = EPOLLIN | EPOLLPRI; rte_vhost_get_vhost_vring(internal->vid, qid, &vring); ev.data.u64 = qid | (uint64_t)vring.kickfd << 32; @@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info { struct rte_vdpa_device *vdev; struct internal_list *list; struct ifcvf_internal *internal; + struct ifcvf_hw *hw; + uint16_t i; vdev = rte_vhost_get_vdpa_device(vid); list = find_internal_resource_by_vdev(vdev); @@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info { rte_atomic32_set(&internal->dev_attached, 1); update_datapath(internal); - if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0) - DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", + hw = &internal->hw; + for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0) + DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", vdev->device->name); + } internal->configured = 1; + DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name); return 0; } -- 1.8.3.1
[PATCH v7 06/12] vdpa/ifc: support dynamic enable/disable queue
From: Huang Wei Support dynamic enable or disable queue. For front end, like QEMU, user can use ethtool to configure queue. For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs. Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 100 ++ drivers/vdpa/ifc/base/ifcvf.h | 6 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 93 --- 3 files changed, 184 insertions(+), 15 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 30bb8cb..869ddd6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -233,6 +233,106 @@ } } +int +ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u8 *lm_cfg; + u16 notify_off; + int msix_vector; + + if (i >= (int)hw->nr_vring) + return -1; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return -1; + } + + ifcvf_enable_mq(hw); + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector != (i + 1)) { + IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector == IFCVF_MSI_NO_VECTOR) { + RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n", + i); + return -1; + } + } + + io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + &cfg->queue_used_hi); + IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + else + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } + + notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off); + hw->notify_addr[i] = (void *)((u8 *)hw->notify_base + + notify_off * hw->notify_off_multiplier); + IFCVF_WRITE_REG16(1, &cfg->queue_enable); + + return 0; +} + +void +ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u32 ring_state; + u8 *lm_cfg; + + if (i >= (int)hw->nr_vring) + return; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return; + } + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + IFCVF_WRITE_REG16(0, &cfg->queue_enable); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + } else { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + } + hw->vring[i].last_used_idx = (u16)(ring_state >> 16); + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 1e133c0..3726da7 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -164,6 +164,12 @@ struct ifcvf_hw { ifcvf_get_features(struct ifcvf_hw *hw); int +ifcvf_en
[PATCH v7 07/12] vdpa/ifc: change internal function name
Change internal function name "find_internal_resource_by_dev" to "find_internal_resource_by_pci_dev". Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 9c49f9c..73d04ed 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -121,7 +121,7 @@ struct rte_vdpa_dev_info { } static struct internal_list * -find_internal_resource_by_dev(struct rte_pci_device *pdev) +find_internal_resource_by_pci_dev(struct rte_pci_device *pdev) { int found = 0; struct internal_list *list; @@ -1746,7 +1746,7 @@ struct rte_vdpa_dev_info dev_info[] = { if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - list = find_internal_resource_by_dev(pci_dev); + list = find_internal_resource_by_pci_dev(pci_dev); if (list == NULL) { DRV_LOG(ERR, "Invalid device: %s", pci_dev->name); return -1; -- 1.8.3.1
[PATCH v7 08/12] vdpa/ifc: add internal API to get device
Add new internal API "find_internal_resource_by_rte_dev" to get device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 73d04ed..c16e263 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -144,6 +144,29 @@ struct rte_vdpa_dev_info { return list; } +static struct internal_list * +find_internal_resource_by_rte_dev(struct rte_device *rte_dev) +{ + int found = 0; + struct internal_list *list; + + pthread_mutex_lock(&internal_list_lock); + + TAILQ_FOREACH(list, &internal_list, next) { + if (rte_dev == &list->internal->pdev->device) { + found = 1; + break; + } + } + + pthread_mutex_unlock(&internal_list_lock); + + if (!found) + return NULL; + + return list; +} + static int ifcvf_vfio_setup(struct ifcvf_internal *internal) { @@ -1398,10 +1421,11 @@ struct rte_vdpa_dev_info { { struct ifcvf_internal *internal; struct internal_list *list; + struct rte_device *rte_dev = vdev->device; - list = find_internal_resource_by_vdev(vdev); + list = find_internal_resource_by_rte_dev(rte_dev); if (list == NULL) { - DRV_LOG(ERR, "Invalid vDPA device: %p", vdev); + DRV_LOG(ERR, "Invalid rte device: %p", rte_dev); return -1; } -- 1.8.3.1
[PATCH v7 09/12] vdpa/ifc: change some driver logic
Insert internal list element to internal list before register vdpa device, in order to call vdpa ops during vdpa device registration. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index c16e263..8dfd493 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1737,17 +1737,20 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->sw_lm = sw_fallback_lm; + pthread_mutex_lock(&internal_list_lock); + TAILQ_INSERT_TAIL(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + internal->vdev = rte_vdpa_register_device(&pci_dev->device, dev_info[internal->hw.device_type].ops); if (internal->vdev == NULL) { DRV_LOG(ERR, "failed to register device %s", pci_dev->name); + pthread_mutex_lock(&internal_list_lock); + TAILQ_REMOVE(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); goto error; } - pthread_mutex_lock(&internal_list_lock); - TAILQ_INSERT_TAIL(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - rte_atomic32_set(&internal->started, 1); update_datapath(internal); -- 1.8.3.1
[PATCH v7 10/12] vhost: add vdpa device type to rte vdpa device
Add vdpa_device_type to rte_vdpa_device to store device type. Call vdpa ops get_dev_type to fill vdpa_device_type when register vdpa device. Signed-off-by: Andy Pei --- lib/vhost/socket.c | 15 +-- lib/vhost/vdpa.c| 17 + lib/vhost/vdpa_driver.h | 2 ++ 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c index 608ae57..f768114 100644 --- a/lib/vhost/socket.c +++ b/lib/vhost/socket.c @@ -627,7 +627,6 @@ struct rte_vdpa_device * { struct vhost_user_socket *vsocket; struct rte_vdpa_device *vdpa_dev; - uint32_t vdpa_type = 0; int ret = 0; pthread_mutex_lock(&vhost_user.mutex); @@ -644,19 +643,7 @@ struct rte_vdpa_device * goto unlock_exit; } - if (vdpa_dev->ops->get_dev_type) { - ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); - if (ret) { - VHOST_LOG_CONFIG(path, ERR, - "failed to get vdpa dev type for socket file.\n"); - ret = -1; - goto unlock_exit; - } - } else { - vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; - } - - *type = vdpa_type; + *type = vdpa_dev->vdpa_device_type; unlock_exit: pthread_mutex_unlock(&vhost_user.mutex); diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c index bb82857..b487f4d 100644 --- a/lib/vhost/vdpa.c +++ b/lib/vhost/vdpa.c @@ -73,6 +73,8 @@ struct rte_vdpa_device * struct rte_vdpa_dev_ops *ops) { struct rte_vdpa_device *dev; + uint32_t vdpa_type = -1; + int ret = 0; if (ops == NULL) return NULL; @@ -101,6 +103,21 @@ struct rte_vdpa_device * dev->device = rte_dev; dev->ops = ops; + + if (ops->get_dev_type) { + ret = ops->get_dev_type(dev, &vdpa_type); + if (ret) { + VHOST_LOG_CONFIG(rte_dev->name, ERR, +"Failed to get vdpa dev type.\n"); + ret = -1; + goto out_unlock; + } + } else { + /** by default, we assume vdpa device is a net device */ + vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; + } + dev->vdpa_device_type = vdpa_type; + TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next); out_unlock: rte_spinlock_unlock(&vdpa_device_list_lock); diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index 8b88a53..c4ec222 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -92,6 +92,8 @@ struct rte_vdpa_device { struct rte_device *device; /** vdpa device operations */ struct rte_vdpa_dev_ops *ops; + /** vdpa device type: net, blk... */ + uint32_t vdpa_device_type; }; /** -- 1.8.3.1
[PATCH v7 11/12] vhost: vDPA blk device gets ready when the first queue is ready
When boot from virtio blk device, seabios in QEMU only enables one queue. To work in this scenario, vDPA BLK device back-end configure device when the first queue is ready. Signed-off-by: Andy Pei Signed-off-by: Huang Wei --- lib/vhost/vhost_user.c | 33 ++--- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index cd65257..d5dbd9b 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -1441,11 +1441,14 @@ } #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u +#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u static int virtio_is_ready(struct virtio_net *dev) { + struct rte_vdpa_device *vdpa_dev; struct vhost_virtqueue *vq; + uint32_t vdpa_type; uint32_t i, nr_vring = dev->nr_vring; if (dev->flags & VIRTIO_DEV_READY) @@ -1454,13 +1457,22 @@ if (!dev->nr_vring) return 0; - if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) { - nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; + vdpa_dev = dev->vdpa_dev; + if (vdpa_dev) + vdpa_type = vdpa_dev->vdpa_device_type; + else + vdpa_type = -1; - if (dev->nr_vring < nr_vring) - return 0; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY; + } else { + if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) + nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; } + if (dev->nr_vring < nr_vring) + return 0; + for (i = 0; i < nr_vring; i++) { vq = dev->virtqueue[i]; @@ -2958,7 +2970,7 @@ static int is_vring_iotlb(struct virtio_net *dev, int ret; int unlock_required = 0; bool handled; - uint32_t vdpa_type = 0; + uint32_t vdpa_type; uint32_t request; uint32_t i; @@ -3170,16 +3182,7 @@ static int is_vring_iotlb(struct virtio_net *dev, if (!vdpa_dev) goto out; - if (vdpa_dev->ops->get_dev_type) { - ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); - if (ret) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa dev type.\n"); - ret = -1; - goto out; - } - } else { - vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; - } + vdpa_type = vdpa_dev->vdpa_device_type; if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK && request != VHOST_USER_SET_VRING_CALL) goto out; -- 1.8.3.1
[PATCH v7 12/12] vhost: improve vDPA blk device configure condition
To support multi-queue, configure device after call fd of all queues are set. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- lib/vhost/vhost_user.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index d5dbd9b..96383b9 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2973,6 +2973,7 @@ static int is_vring_iotlb(struct virtio_net *dev, uint32_t vdpa_type; uint32_t request; uint32_t i; + uint16_t blk_call_fd; dev = get_device(vid); if (dev == NULL) @@ -3183,9 +3184,15 @@ static int is_vring_iotlb(struct virtio_net *dev, goto out; vdpa_type = vdpa_dev->vdpa_device_type; - if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK - && request != VHOST_USER_SET_VRING_CALL) - goto out; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + if (request == VHOST_USER_SET_VRING_CALL) { + blk_call_fd = ctx.msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (blk_call_fd != dev->nr_vring - 1) + goto out; + } else { + goto out; + } + } if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) -- 1.8.3.1
[PATCH v8 00/12] vdpa/ifc: add multi queue support
v8: change "vdpa_device_type" in "rte_vdpa_device" to "type". v7: Fill vdpa_device_type in vdpa device registration. v6: Add vdpa_device_type to rte_vdpa_device to store vDPA device type. v5: fix some commit message. rework some code logic. v4: fix some commit message. add some commets to code. fix some code to reduce confusion. v3: rename device ID macro name. fix some patch title and commit message. delete some used marco. rework some code logic. v2: fix some coding style issue. support dynamic enable/disable queue at run time. Andy Pei (10): vdpa/ifc: add multi-queue support vdpa/ifc: set max queues based on virtio spec vdpa/ifc: write queue count to MQ register vdpa/ifc: only configure enabled queue vdpa/ifc: change internal function name vdpa/ifc: add internal API to get device vdpa/ifc: change some driver logic vhost: add type to rte vdpa device vhost: vDPA blk device gets ready when the first queue is ready vhost: improve vDPA blk device configure condition Huang Wei (2): vdpa/ifc: add new device ID for legacy network device vdpa/ifc: support dynamic enable/disable queue drivers/vdpa/ifc/base/ifcvf.c | 144 drivers/vdpa/ifc/base/ifcvf.h | 16 +++- drivers/vdpa/ifc/ifcvf_vdpa.c | 185 +++--- lib/vhost/socket.c| 15 +--- lib/vhost/vdpa.c | 15 lib/vhost/vdpa_driver.h | 2 + lib/vhost/vhost_user.c| 38 + 7 files changed, 354 insertions(+), 61 deletions(-) -- 1.8.3.1
[PATCH v8 01/12] vdpa/ifc: add new device ID for legacy network device
From: Huang Wei Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000). Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID". Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/base/ifcvf.h | 6 -- drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 9d95aac..ef7697a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -12,12 +12,14 @@ #define IFCVF_BLK 1 #define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_NET_DEVICE_ID 0x1041 +#define IFCVF_NET_MODERN_DEVICE_ID 0x1041 #define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 #define IFCVF_SUBSYS_DEVICE_ID 0x001A -#define IFCVF_BLK_DEVICE_ID 0x0002 +#define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 +#define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 #define IFCVF_MAX_QUEUES 1 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index d5ac583..b4389a0 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = { static const struct rte_pci_id pci_id_ifcvf_map[] = { { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, - .device_id = IFCVF_NET_DEVICE_ID, + .device_id = IFCVF_NET_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, + .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID, + .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, + .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID, + }, + + { .class_id = RTE_CLASS_ANY_ID, + .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .vendor_id = 0, /* sentinel */ -- 1.8.3.1
[PATCH v8 03/12] vdpa/ifc: set max queues based on virtio spec
Set max_queues according to virtio spec. For virtio BLK device, set max_queues to the value of num_queues in struct virtio_blk_config. For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg, get the queue pair number using num_queues and set max_queues to it. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.h | 2 +- drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index d16d9ab..1e133c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -21,7 +21,7 @@ #define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 #define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 -#define IFCVF_MAX_QUEUES 1 +#define IFCVF_MAX_QUEUES 32 #ifndef VIRTIO_F_IOMMU_PLATFORM #define VIRTIO_F_IOMMU_PLATFORM33 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 008cf89..5a24204 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -26,6 +26,12 @@ #include "base/ifcvf.h" +/* + * RTE_MIN() cannot be used since braced-group within expression allowed + * only inside a function. + */ +#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2)) + RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE); #define DRV_LOG(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \ @@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = { uint64_t capacity = 0; uint8_t *byte; uint32_t i; + uint16_t queue_pairs; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->configured = 0; - internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); device_id = ifcvf_pci_get_device_type(pci_dev); @@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->hw.device_type = IFCVF_NET; + /* +* ifc device always has CTRL_VQ, +* and supports VIRTIO_NET_F_CTRL_VQ feature. +*/ + queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2; + DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name, + queue_pairs); + internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs); internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; @@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(DEBUG, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + internal->max_queues = MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v8 02/12] vdpa/ifc: add multi-queue support
Enable VHOST_USER_PROTOCOL_F_MQ feature. Expose IFCVF_MQ_OFFSET register to enable multi-queue. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 9 + drivers/vdpa/ifc/base/ifcvf.h | 2 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index f1e1474..81c68c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -90,6 +90,15 @@ if (!hw->lm_cfg) WARNINGOUT("HW support live migration not support!\n"); + /* For some hardware implementation, for example: +* the BAR 4 of PF is NULL, while BAR 4 of VF is not. +* This code makes sure hw->mq_cfg is a valid address. +*/ + if (hw->mem_resource[4].addr) + hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET; + else + hw->mq_cfg = NULL; + if (hw->common_cfg == NULL || hw->notify_base == NULL || hw->isr == NULL || hw->dev_cfg == NULL) { DEBUGOUT("capability incomplete\n"); diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index ef7697a..d16d9ab 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -50,6 +50,7 @@ #define IFCVF_LM_CFG_SIZE 0x40 #define IFCVF_LM_RING_STATE_OFFSET 0x20 +#define IFCVF_MQ_OFFSET0x28 #define IFCVF_LM_LOGGING_CTRL 0x0 @@ -149,6 +150,7 @@ struct ifcvf_hw { u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; u8 *lm_cfg; + u8 *mq_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; int device_type; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index b4389a0..008cf89 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \ 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ +1ULL << VHOST_USER_PROTOCOL_F_MQ | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) #define VDPA_BLK_PROTOCOL_FEATURES \ -- 1.8.3.1
[PATCH v8 04/12] vdpa/ifc: write queue count to MQ register
Write queue count to IFCVF_MQ_OFFSET register to enable multi-queue feature. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 81c68c0..b377126 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -202,6 +202,37 @@ IFCVF_WRITE_REG32(val >> 32, hi); } +STATIC void +ifcvf_enable_mq(struct ifcvf_hw *hw) +{ + u8 *mq_cfg; + u8 qid; + int nr_queue = 0; + + for (qid = 0; qid < hw->nr_vring; qid++) { + if (!hw->vring[qid].enable) + continue; + nr_queue++; + } + + if (nr_queue == 0) { + WARNINGOUT("no enabled vring\n"); + return; + } + + mq_cfg = hw->mq_cfg; + if (mq_cfg) { + if (hw->device_type == IFCVF_BLK) { + *(u32 *)mq_cfg = nr_queue; + RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue); + } else { + *(u32 *)mq_cfg = nr_queue / 2; + RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n", + nr_queue / 2); + } + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { @@ -219,6 +250,7 @@ return -1; } + ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, -- 1.8.3.1
[PATCH v8 05/12] vdpa/ifc: only configure enabled queue
When configuring the hardware queue, we only configure queues which have been enabled by vhost. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 3 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index b377126..30bb8cb 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -252,6 +252,9 @@ ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, &cfg->queue_desc_hi); diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 5a24204..0c3407a 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -284,6 +284,8 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &hw->req_features); for (i = 0; i < nr_vring; i++) { + if (!hw->vring[i].enable) + continue; rte_vhost_get_vhost_vring(vid, i, &vq); gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc); if (gpa == 0) { @@ -499,6 +501,8 @@ struct rte_vdpa_dev_info { vring.kickfd = -1; for (qid = 0; qid < q_num; qid++) { + if (!hw->vring[qid].enable) + continue; ev.events = EPOLLIN | EPOLLPRI; rte_vhost_get_vhost_vring(internal->vid, qid, &vring); ev.data.u64 = qid | (uint64_t)vring.kickfd << 32; @@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info { struct rte_vdpa_device *vdev; struct internal_list *list; struct ifcvf_internal *internal; + struct ifcvf_hw *hw; + uint16_t i; vdev = rte_vhost_get_vdpa_device(vid); list = find_internal_resource_by_vdev(vdev); @@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info { rte_atomic32_set(&internal->dev_attached, 1); update_datapath(internal); - if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0) - DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", + hw = &internal->hw; + for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0) + DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", vdev->device->name); + } internal->configured = 1; + DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name); return 0; } -- 1.8.3.1
[PATCH v8 07/12] vdpa/ifc: change internal function name
Change internal function name "find_internal_resource_by_dev" to "find_internal_resource_by_pci_dev". Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 9c49f9c..73d04ed 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -121,7 +121,7 @@ struct rte_vdpa_dev_info { } static struct internal_list * -find_internal_resource_by_dev(struct rte_pci_device *pdev) +find_internal_resource_by_pci_dev(struct rte_pci_device *pdev) { int found = 0; struct internal_list *list; @@ -1746,7 +1746,7 @@ struct rte_vdpa_dev_info dev_info[] = { if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - list = find_internal_resource_by_dev(pci_dev); + list = find_internal_resource_by_pci_dev(pci_dev); if (list == NULL) { DRV_LOG(ERR, "Invalid device: %s", pci_dev->name); return -1; -- 1.8.3.1
[PATCH v8 06/12] vdpa/ifc: support dynamic enable/disable queue
From: Huang Wei Support dynamic enable or disable queue. For front end, like QEMU, user can use ethtool to configure queue. For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs. Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 100 ++ drivers/vdpa/ifc/base/ifcvf.h | 6 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 93 --- 3 files changed, 184 insertions(+), 15 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 30bb8cb..869ddd6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -233,6 +233,106 @@ } } +int +ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u8 *lm_cfg; + u16 notify_off; + int msix_vector; + + if (i >= (int)hw->nr_vring) + return -1; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return -1; + } + + ifcvf_enable_mq(hw); + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector != (i + 1)) { + IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector == IFCVF_MSI_NO_VECTOR) { + RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n", + i); + return -1; + } + } + + io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + &cfg->queue_used_hi); + IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + else + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } + + notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off); + hw->notify_addr[i] = (void *)((u8 *)hw->notify_base + + notify_off * hw->notify_off_multiplier); + IFCVF_WRITE_REG16(1, &cfg->queue_enable); + + return 0; +} + +void +ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u32 ring_state; + u8 *lm_cfg; + + if (i >= (int)hw->nr_vring) + return; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return; + } + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + IFCVF_WRITE_REG16(0, &cfg->queue_enable); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + } else { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + } + hw->vring[i].last_used_idx = (u16)(ring_state >> 16); + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 1e133c0..3726da7 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -164,6 +164,12 @@ struct ifcvf_hw { ifcvf_get_features(struct ifcvf_hw *hw); int +ifcvf_en
[PATCH v8 08/12] vdpa/ifc: add internal API to get device
Add new internal API "find_internal_resource_by_rte_dev" to get device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 73d04ed..c16e263 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -144,6 +144,29 @@ struct rte_vdpa_dev_info { return list; } +static struct internal_list * +find_internal_resource_by_rte_dev(struct rte_device *rte_dev) +{ + int found = 0; + struct internal_list *list; + + pthread_mutex_lock(&internal_list_lock); + + TAILQ_FOREACH(list, &internal_list, next) { + if (rte_dev == &list->internal->pdev->device) { + found = 1; + break; + } + } + + pthread_mutex_unlock(&internal_list_lock); + + if (!found) + return NULL; + + return list; +} + static int ifcvf_vfio_setup(struct ifcvf_internal *internal) { @@ -1398,10 +1421,11 @@ struct rte_vdpa_dev_info { { struct ifcvf_internal *internal; struct internal_list *list; + struct rte_device *rte_dev = vdev->device; - list = find_internal_resource_by_vdev(vdev); + list = find_internal_resource_by_rte_dev(rte_dev); if (list == NULL) { - DRV_LOG(ERR, "Invalid vDPA device: %p", vdev); + DRV_LOG(ERR, "Invalid rte device: %p", rte_dev); return -1; } -- 1.8.3.1
[PATCH v8 09/12] vdpa/ifc: change some driver logic
Insert internal list element to internal list before register vdpa device, in order to call vdpa ops during vdpa device registration. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index c16e263..8dfd493 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1737,17 +1737,20 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->sw_lm = sw_fallback_lm; + pthread_mutex_lock(&internal_list_lock); + TAILQ_INSERT_TAIL(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + internal->vdev = rte_vdpa_register_device(&pci_dev->device, dev_info[internal->hw.device_type].ops); if (internal->vdev == NULL) { DRV_LOG(ERR, "failed to register device %s", pci_dev->name); + pthread_mutex_lock(&internal_list_lock); + TAILQ_REMOVE(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); goto error; } - pthread_mutex_lock(&internal_list_lock); - TAILQ_INSERT_TAIL(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - rte_atomic32_set(&internal->started, 1); update_datapath(internal); -- 1.8.3.1
[PATCH v8 10/12] vhost: add type to rte vdpa device
Add type to rte_vdpa_device to store device type. Call vdpa ops get_dev_type to fill type when register vdpa device. Signed-off-by: Andy Pei --- lib/vhost/socket.c | 15 +-- lib/vhost/vdpa.c| 15 +++ lib/vhost/vdpa_driver.h | 2 ++ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c index 608ae57..863a6f6 100644 --- a/lib/vhost/socket.c +++ b/lib/vhost/socket.c @@ -627,7 +627,6 @@ struct rte_vdpa_device * { struct vhost_user_socket *vsocket; struct rte_vdpa_device *vdpa_dev; - uint32_t vdpa_type = 0; int ret = 0; pthread_mutex_lock(&vhost_user.mutex); @@ -644,19 +643,7 @@ struct rte_vdpa_device * goto unlock_exit; } - if (vdpa_dev->ops->get_dev_type) { - ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); - if (ret) { - VHOST_LOG_CONFIG(path, ERR, - "failed to get vdpa dev type for socket file.\n"); - ret = -1; - goto unlock_exit; - } - } else { - vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; - } - - *type = vdpa_type; + *type = vdpa_dev->type; unlock_exit: pthread_mutex_unlock(&vhost_user.mutex); diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c index bb82857..577cb00 100644 --- a/lib/vhost/vdpa.c +++ b/lib/vhost/vdpa.c @@ -73,6 +73,7 @@ struct rte_vdpa_device * struct rte_vdpa_dev_ops *ops) { struct rte_vdpa_device *dev; + int ret = 0; if (ops == NULL) return NULL; @@ -101,6 +102,20 @@ struct rte_vdpa_device * dev->device = rte_dev; dev->ops = ops; + + if (ops->get_dev_type) { + ret = ops->get_dev_type(dev, &dev->type); + if (ret) { + VHOST_LOG_CONFIG(rte_dev->name, ERR, +"Failed to get vdpa dev type.\n"); + ret = -1; + goto out_unlock; + } + } else { + /** by default, we assume vdpa device is a net device */ + dev->type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; + } + TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next); out_unlock: rte_spinlock_unlock(&vdpa_device_list_lock); diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index 8b88a53..8db4ab9 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -92,6 +92,8 @@ struct rte_vdpa_device { struct rte_device *device; /** vdpa device operations */ struct rte_vdpa_dev_ops *ops; + /** vdpa device type: net, blk... */ + uint32_t type; }; /** -- 1.8.3.1
[PATCH v8 11/12] vhost: vDPA blk device gets ready when the first queue is ready
When boot from virtio blk device, seabios in QEMU only enables one queue. To work in this scenario, vDPA BLK device back-end configure device when the first queue is ready. Signed-off-by: Andy Pei Signed-off-by: Huang Wei --- lib/vhost/vhost_user.c | 33 + 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index cd65257..e0ff79d 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -1441,11 +1441,14 @@ } #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u +#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u static int virtio_is_ready(struct virtio_net *dev) { + struct rte_vdpa_device *vdpa_dev; struct vhost_virtqueue *vq; + uint32_t vdpa_type; uint32_t i, nr_vring = dev->nr_vring; if (dev->flags & VIRTIO_DEV_READY) @@ -1454,13 +1457,22 @@ if (!dev->nr_vring) return 0; - if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) { - nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; + vdpa_dev = dev->vdpa_dev; + if (vdpa_dev) + vdpa_type = vdpa_dev->type; + else + vdpa_type = -1; - if (dev->nr_vring < nr_vring) - return 0; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY; + } else { + if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) + nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; } + if (dev->nr_vring < nr_vring) + return 0; + for (i = 0; i < nr_vring; i++) { vq = dev->virtqueue[i]; @@ -2958,7 +2970,6 @@ static int is_vring_iotlb(struct virtio_net *dev, int ret; int unlock_required = 0; bool handled; - uint32_t vdpa_type = 0; uint32_t request; uint32_t i; @@ -3170,17 +3181,7 @@ static int is_vring_iotlb(struct virtio_net *dev, if (!vdpa_dev) goto out; - if (vdpa_dev->ops->get_dev_type) { - ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); - if (ret) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa dev type.\n"); - ret = -1; - goto out; - } - } else { - vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; - } - if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK + if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK && request != VHOST_USER_SET_VRING_CALL) goto out; -- 1.8.3.1
[PATCH v8 12/12] vhost: improve vDPA blk device configure condition
To support multi-queue, configure device after call fd of all queues are set. Signed-off-by: Andy Pei Signed-off-by: Huang Wei --- lib/vhost/vhost_user.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index e0ff79d..9902ae9 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2972,6 +2972,7 @@ static int is_vring_iotlb(struct virtio_net *dev, bool handled; uint32_t request; uint32_t i; + uint16_t blk_call_fd; dev = get_device(vid); if (dev == NULL) @@ -3181,9 +3182,15 @@ static int is_vring_iotlb(struct virtio_net *dev, if (!vdpa_dev) goto out; - if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK - && request != VHOST_USER_SET_VRING_CALL) - goto out; + if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + if (request == VHOST_USER_SET_VRING_CALL) { + blk_call_fd = ctx.msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (blk_call_fd != dev->nr_vring - 1) + goto out; + } else { + goto out; + } + } if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) -- 1.8.3.1
[PATCH v9 01/12] vdpa/ifc: add new device ID for legacy network device
From: Huang Wei Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000). Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID". Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/base/ifcvf.h | 6 -- drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 9d95aac..ef7697a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -12,12 +12,14 @@ #define IFCVF_BLK 1 #define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_NET_DEVICE_ID 0x1041 +#define IFCVF_NET_MODERN_DEVICE_ID 0x1041 #define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 #define IFCVF_SUBSYS_DEVICE_ID 0x001A -#define IFCVF_BLK_DEVICE_ID 0x0002 +#define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 +#define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 #define IFCVF_MAX_QUEUES 1 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index d5ac583..b4389a0 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = { static const struct rte_pci_id pci_id_ifcvf_map[] = { { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, - .device_id = IFCVF_NET_DEVICE_ID, + .device_id = IFCVF_NET_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, + .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID, + .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, + .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID, + }, + + { .class_id = RTE_CLASS_ANY_ID, + .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .class_id = RTE_CLASS_ANY_ID, .vendor_id = IFCVF_VENDOR_ID, .device_id = IFCVF_BLK_MODERN_DEVICE_ID, .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID, - .subsystem_device_id = IFCVF_BLK_DEVICE_ID, + .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID, }, { .vendor_id = 0, /* sentinel */ -- 1.8.3.1
[PATCH v9 00/12] vdpa/ifc: add multi queue support
v9: fix some commit message. v8: change "vdpa_device_type" in "rte_vdpa_device" to "type". v7: Fill vdpa_device_type in vdpa device registration. v6: Add vdpa_device_type to rte_vdpa_device to store vDPA device type. v5: fix some commit message. rework some code logic. v4: fix some commit message. add some commets to code. fix some code to reduce confusion. v3: rename device ID macro name. fix some patch title and commit message. delete some used marco. rework some code logic. v2: fix some coding style issue. support dynamic enable/disable queue at run time. Andy Pei (10): vdpa/ifc: add multi-queue support vdpa/ifc: set max queues based on virtio spec vdpa/ifc: write queue count to MQ register vdpa/ifc: only configure enabled queue vdpa/ifc: change internal function name vdpa/ifc: add internal API to get device vdpa/ifc: improve internal list logic vhost: add type to rte vdpa device vhost: vDPA blk device gets ready when the first queue is ready vhost: improve vDPA blk device configure condition Huang Wei (2): vdpa/ifc: add new device ID for legacy network device vdpa/ifc: support dynamic enable/disable queue drivers/vdpa/ifc/base/ifcvf.c | 144 drivers/vdpa/ifc/base/ifcvf.h | 16 +++- drivers/vdpa/ifc/ifcvf_vdpa.c | 185 +++--- lib/vhost/socket.c| 15 +--- lib/vhost/vdpa.c | 15 lib/vhost/vdpa_driver.h | 2 + lib/vhost/vhost_user.c| 38 + 7 files changed, 354 insertions(+), 61 deletions(-) -- 1.8.3.1
[PATCH v9 02/12] vdpa/ifc: add multi-queue support
Enable VHOST_USER_PROTOCOL_F_MQ feature. Expose IFCVF_MQ_OFFSET register to enable multi-queue. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 9 + drivers/vdpa/ifc/base/ifcvf.h | 2 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index f1e1474..81c68c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -90,6 +90,15 @@ if (!hw->lm_cfg) WARNINGOUT("HW support live migration not support!\n"); + /* For some hardware implementation, for example: +* the BAR 4 of PF is NULL, while BAR 4 of VF is not. +* This code makes sure hw->mq_cfg is a valid address. +*/ + if (hw->mem_resource[4].addr) + hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET; + else + hw->mq_cfg = NULL; + if (hw->common_cfg == NULL || hw->notify_base == NULL || hw->isr == NULL || hw->dev_cfg == NULL) { DEBUGOUT("capability incomplete\n"); diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index ef7697a..d16d9ab 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -50,6 +50,7 @@ #define IFCVF_LM_CFG_SIZE 0x40 #define IFCVF_LM_RING_STATE_OFFSET 0x20 +#define IFCVF_MQ_OFFSET0x28 #define IFCVF_LM_LOGGING_CTRL 0x0 @@ -149,6 +150,7 @@ struct ifcvf_hw { u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; u8 *lm_cfg; + u8 *mq_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; int device_type; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index b4389a0..008cf89 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \ 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ +1ULL << VHOST_USER_PROTOCOL_F_MQ | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) #define VDPA_BLK_PROTOCOL_FEATURES \ -- 1.8.3.1
[PATCH v9 03/12] vdpa/ifc: set max queues based on virtio spec
Set max_queues according to virtio spec. For virtio BLK device, set max_queues to the value of num_queues in struct virtio_blk_config. For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg, get the queue pair number using num_queues and set max_queues to it. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.h | 2 +- drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index d16d9ab..1e133c0 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -21,7 +21,7 @@ #define IFCVF_SUBSYS_NET_DEVICE_ID 0x0001 #define IFCVF_SUBSYS_BLK_DEVICE_ID 0x0002 -#define IFCVF_MAX_QUEUES 1 +#define IFCVF_MAX_QUEUES 32 #ifndef VIRTIO_F_IOMMU_PLATFORM #define VIRTIO_F_IOMMU_PLATFORM33 diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 008cf89..5a24204 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -26,6 +26,12 @@ #include "base/ifcvf.h" +/* + * RTE_MIN() cannot be used since braced-group within expression allowed + * only inside a function. + */ +#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2)) + RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE); #define DRV_LOG(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \ @@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = { uint64_t capacity = 0; uint8_t *byte; uint32_t i; + uint16_t queue_pairs; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->configured = 0; - internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); device_id = ifcvf_pci_get_device_type(pci_dev); @@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->hw.device_type = IFCVF_NET; + /* +* ifc device always has CTRL_VQ, +* and supports VIRTIO_NET_F_CTRL_VQ feature. +*/ + queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2; + DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name, + queue_pairs); + internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs); internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; @@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(DEBUG, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + internal->max_queues = MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v9 06/12] vdpa/ifc: support dynamic enable/disable queue
From: Huang Wei Support dynamic enable or disable queue. For front end, like QEMU, user can use ethtool to configure queue. For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs. Signed-off-by: Huang Wei Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 100 ++ drivers/vdpa/ifc/base/ifcvf.h | 6 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 93 --- 3 files changed, 184 insertions(+), 15 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 30bb8cb..869ddd6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -233,6 +233,106 @@ } } +int +ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u8 *lm_cfg; + u16 notify_off; + int msix_vector; + + if (i >= (int)hw->nr_vring) + return -1; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return -1; + } + + ifcvf_enable_mq(hw); + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector != (i + 1)) { + IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); + msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector); + if (msix_vector == IFCVF_MSI_NO_VECTOR) { + RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n", + i); + return -1; + } + } + + io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + &cfg->queue_used_hi); + IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + else + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } + + notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off); + hw->notify_addr[i] = (void *)((u8 *)hw->notify_base + + notify_off * hw->notify_off_multiplier); + IFCVF_WRITE_REG16(1, &cfg->queue_enable); + + return 0; +} + +void +ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i) +{ + struct ifcvf_pci_common_cfg *cfg; + u32 ring_state; + u8 *lm_cfg; + + if (i >= (int)hw->nr_vring) + return; + + cfg = hw->common_cfg; + if (!cfg) { + RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n"); + return; + } + + IFCVF_WRITE_REG16(i, &cfg->queue_select); + IFCVF_WRITE_REG16(0, &cfg->queue_enable); + + lm_cfg = hw->lm_cfg; + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + } else { + ring_state = *(u32 *)(lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + } + hw->vring[i].last_used_idx = (u16)(ring_state >> 16); + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 1e133c0..3726da7 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -164,6 +164,12 @@ struct ifcvf_hw { ifcvf_get_features(struct ifcvf_hw *hw); int +ifcvf_en
[PATCH v9 04/12] vdpa/ifc: write queue count to MQ register
Write queue count to IFCVF_MQ_OFFSET register to enable multi-queue feature. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 32 1 file changed, 32 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 81c68c0..b377126 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -202,6 +202,37 @@ IFCVF_WRITE_REG32(val >> 32, hi); } +STATIC void +ifcvf_enable_mq(struct ifcvf_hw *hw) +{ + u8 *mq_cfg; + u8 qid; + int nr_queue = 0; + + for (qid = 0; qid < hw->nr_vring; qid++) { + if (!hw->vring[qid].enable) + continue; + nr_queue++; + } + + if (nr_queue == 0) { + WARNINGOUT("no enabled vring\n"); + return; + } + + mq_cfg = hw->mq_cfg; + if (mq_cfg) { + if (hw->device_type == IFCVF_BLK) { + *(u32 *)mq_cfg = nr_queue; + RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue); + } else { + *(u32 *)mq_cfg = nr_queue / 2; + RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n", + nr_queue / 2); + } + } +} + STATIC int ifcvf_hw_enable(struct ifcvf_hw *hw) { @@ -219,6 +250,7 @@ return -1; } + ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, -- 1.8.3.1
[PATCH v9 05/12] vdpa/ifc: only configure enabled queue
When configuring the hardware queue, we only configure queues which have been enabled by vhost. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia --- drivers/vdpa/ifc/base/ifcvf.c | 3 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index b377126..30bb8cb 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -252,6 +252,9 @@ ifcvf_enable_mq(hw); for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + IFCVF_WRITE_REG16(i, &cfg->queue_select); io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, &cfg->queue_desc_hi); diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 5a24204..0c3407a 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -284,6 +284,8 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &hw->req_features); for (i = 0; i < nr_vring; i++) { + if (!hw->vring[i].enable) + continue; rte_vhost_get_vhost_vring(vid, i, &vq); gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc); if (gpa == 0) { @@ -499,6 +501,8 @@ struct rte_vdpa_dev_info { vring.kickfd = -1; for (qid = 0; qid < q_num; qid++) { + if (!hw->vring[qid].enable) + continue; ev.events = EPOLLIN | EPOLLPRI; rte_vhost_get_vhost_vring(internal->vid, qid, &vring); ev.data.u64 = qid | (uint64_t)vring.kickfd << 32; @@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info { struct rte_vdpa_device *vdev; struct internal_list *list; struct ifcvf_internal *internal; + struct ifcvf_hw *hw; + uint16_t i; vdev = rte_vhost_get_vdpa_device(vid); list = find_internal_resource_by_vdev(vdev); @@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info { rte_atomic32_set(&internal->dev_attached, 1); update_datapath(internal); - if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0) - DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", + hw = &internal->hw; + for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].enable) + continue; + if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0) + DRV_LOG(NOTICE, "vDPA (%s): software relay is used.", vdev->device->name); + } internal->configured = 1; + DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name); return 0; } -- 1.8.3.1
[PATCH v9 07/12] vdpa/ifc: change internal function name
Change internal function name "find_internal_resource_by_dev" to "find_internal_resource_by_pci_dev". Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 9c49f9c..73d04ed 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -121,7 +121,7 @@ struct rte_vdpa_dev_info { } static struct internal_list * -find_internal_resource_by_dev(struct rte_pci_device *pdev) +find_internal_resource_by_pci_dev(struct rte_pci_device *pdev) { int found = 0; struct internal_list *list; @@ -1746,7 +1746,7 @@ struct rte_vdpa_dev_info dev_info[] = { if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - list = find_internal_resource_by_dev(pci_dev); + list = find_internal_resource_by_pci_dev(pci_dev); if (list == NULL) { DRV_LOG(ERR, "Invalid device: %s", pci_dev->name); return -1; -- 1.8.3.1
[PATCH v9 08/12] vdpa/ifc: add internal API to get device
Add new internal API "find_internal_resource_by_rte_dev" to get device. Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 73d04ed..c16e263 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -144,6 +144,29 @@ struct rte_vdpa_dev_info { return list; } +static struct internal_list * +find_internal_resource_by_rte_dev(struct rte_device *rte_dev) +{ + int found = 0; + struct internal_list *list; + + pthread_mutex_lock(&internal_list_lock); + + TAILQ_FOREACH(list, &internal_list, next) { + if (rte_dev == &list->internal->pdev->device) { + found = 1; + break; + } + } + + pthread_mutex_unlock(&internal_list_lock); + + if (!found) + return NULL; + + return list; +} + static int ifcvf_vfio_setup(struct ifcvf_internal *internal) { @@ -1398,10 +1421,11 @@ struct rte_vdpa_dev_info { { struct ifcvf_internal *internal; struct internal_list *list; + struct rte_device *rte_dev = vdev->device; - list = find_internal_resource_by_vdev(vdev); + list = find_internal_resource_by_rte_dev(rte_dev); if (list == NULL) { - DRV_LOG(ERR, "Invalid vDPA device: %p", vdev); + DRV_LOG(ERR, "Invalid rte device: %p", rte_dev); return -1; } -- 1.8.3.1
[PATCH v9 09/12] vdpa/ifc: improve internal list logic
Insert internal list element to internal list before register vdpa device, in order to call vdpa ops during vdpa device registration. Signed-off-by: Andy Pei Reviewed-by: Maxime Coquelin --- drivers/vdpa/ifc/ifcvf_vdpa.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index c16e263..8dfd493 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1737,17 +1737,20 @@ struct rte_vdpa_dev_info dev_info[] = { } internal->sw_lm = sw_fallback_lm; + pthread_mutex_lock(&internal_list_lock); + TAILQ_INSERT_TAIL(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + internal->vdev = rte_vdpa_register_device(&pci_dev->device, dev_info[internal->hw.device_type].ops); if (internal->vdev == NULL) { DRV_LOG(ERR, "failed to register device %s", pci_dev->name); + pthread_mutex_lock(&internal_list_lock); + TAILQ_REMOVE(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); goto error; } - pthread_mutex_lock(&internal_list_lock); - TAILQ_INSERT_TAIL(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - rte_atomic32_set(&internal->started, 1); update_datapath(internal); -- 1.8.3.1
[PATCH v9 10/12] vhost: add type to rte vdpa device
Add type to rte_vdpa_device to store device type. Call vdpa ops get_dev_type to fill type when register vdpa device. Signed-off-by: Andy Pei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- lib/vhost/socket.c | 15 +-- lib/vhost/vdpa.c| 15 +++ lib/vhost/vdpa_driver.h | 2 ++ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c index 608ae57..863a6f6 100644 --- a/lib/vhost/socket.c +++ b/lib/vhost/socket.c @@ -627,7 +627,6 @@ struct rte_vdpa_device * { struct vhost_user_socket *vsocket; struct rte_vdpa_device *vdpa_dev; - uint32_t vdpa_type = 0; int ret = 0; pthread_mutex_lock(&vhost_user.mutex); @@ -644,19 +643,7 @@ struct rte_vdpa_device * goto unlock_exit; } - if (vdpa_dev->ops->get_dev_type) { - ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); - if (ret) { - VHOST_LOG_CONFIG(path, ERR, - "failed to get vdpa dev type for socket file.\n"); - ret = -1; - goto unlock_exit; - } - } else { - vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; - } - - *type = vdpa_type; + *type = vdpa_dev->type; unlock_exit: pthread_mutex_unlock(&vhost_user.mutex); diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c index bb82857..577cb00 100644 --- a/lib/vhost/vdpa.c +++ b/lib/vhost/vdpa.c @@ -73,6 +73,7 @@ struct rte_vdpa_device * struct rte_vdpa_dev_ops *ops) { struct rte_vdpa_device *dev; + int ret = 0; if (ops == NULL) return NULL; @@ -101,6 +102,20 @@ struct rte_vdpa_device * dev->device = rte_dev; dev->ops = ops; + + if (ops->get_dev_type) { + ret = ops->get_dev_type(dev, &dev->type); + if (ret) { + VHOST_LOG_CONFIG(rte_dev->name, ERR, +"Failed to get vdpa dev type.\n"); + ret = -1; + goto out_unlock; + } + } else { + /** by default, we assume vdpa device is a net device */ + dev->type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; + } + TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next); out_unlock: rte_spinlock_unlock(&vdpa_device_list_lock); diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index 8b88a53..8db4ab9 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -92,6 +92,8 @@ struct rte_vdpa_device { struct rte_device *device; /** vdpa device operations */ struct rte_vdpa_dev_ops *ops; + /** vdpa device type: net, blk... */ + uint32_t type; }; /** -- 1.8.3.1
[PATCH v9 11/12] vhost: vDPA blk device gets ready when the first queue is ready
When boot from virtio blk device, seabios in QEMU only enables one queue. To work in this scenario, vDPA BLK device back-end configure device when the first queue is ready. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- lib/vhost/vhost_user.c | 33 + 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index cd65257..e0ff79d 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -1441,11 +1441,14 @@ } #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u +#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u static int virtio_is_ready(struct virtio_net *dev) { + struct rte_vdpa_device *vdpa_dev; struct vhost_virtqueue *vq; + uint32_t vdpa_type; uint32_t i, nr_vring = dev->nr_vring; if (dev->flags & VIRTIO_DEV_READY) @@ -1454,13 +1457,22 @@ if (!dev->nr_vring) return 0; - if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) { - nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; + vdpa_dev = dev->vdpa_dev; + if (vdpa_dev) + vdpa_type = vdpa_dev->type; + else + vdpa_type = -1; - if (dev->nr_vring < nr_vring) - return 0; + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY; + } else { + if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) + nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY; } + if (dev->nr_vring < nr_vring) + return 0; + for (i = 0; i < nr_vring; i++) { vq = dev->virtqueue[i]; @@ -2958,7 +2970,6 @@ static int is_vring_iotlb(struct virtio_net *dev, int ret; int unlock_required = 0; bool handled; - uint32_t vdpa_type = 0; uint32_t request; uint32_t i; @@ -3170,17 +3181,7 @@ static int is_vring_iotlb(struct virtio_net *dev, if (!vdpa_dev) goto out; - if (vdpa_dev->ops->get_dev_type) { - ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); - if (ret) { - VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa dev type.\n"); - ret = -1; - goto out; - } - } else { - vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; - } - if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK + if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK && request != VHOST_USER_SET_VRING_CALL) goto out; -- 1.8.3.1
[PATCH v9 12/12] vhost: improve vDPA blk device configure condition
To support multi-queue, configure device after call fd of all queues are set. Signed-off-by: Andy Pei Signed-off-by: Huang Wei Reviewed-by: Chenbo Xia Reviewed-by: Maxime Coquelin --- lib/vhost/vhost_user.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index e0ff79d..9902ae9 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2972,6 +2972,7 @@ static int is_vring_iotlb(struct virtio_net *dev, bool handled; uint32_t request; uint32_t i; + uint16_t blk_call_fd; dev = get_device(vid); if (dev == NULL) @@ -3181,9 +3182,15 @@ static int is_vring_iotlb(struct virtio_net *dev, if (!vdpa_dev) goto out; - if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK - && request != VHOST_USER_SET_VRING_CALL) - goto out; + if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) { + if (request == VHOST_USER_SET_VRING_CALL) { + blk_call_fd = ctx.msg.payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (blk_call_fd != dev->nr_vring - 1) + goto out; + } else { + goto out; + } + } if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) -- 1.8.3.1
[PATCH] vdpa/ifc: fix null pointer dereference
Fix null pointer dereference reported in coverity scan. Coverity issue: 378882 Fixes: 8162a4a9 ("vdpa/ifc/base: access correct register for blk device") Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index dd475a7..0a9f71a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -255,6 +255,10 @@ u32 ring_state; cfg = hw->common_cfg; + if (!cfg) { + DEBUGOUT("common_cfg in HW is NULL.\n"); + return; + } IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->msix_config); for (i = 0; i < hw->nr_vring; i++) { @@ -262,6 +266,11 @@ IFCVF_WRITE_REG16(0, &cfg->queue_enable); IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector); + if (!hw->lm_cfg) { + DEBUGOUT("live migration cfg in HW is NULL.\n"); + continue; + } + if (hw->device_type == IFCVF_BLK) ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET + -- 1.8.3.1
[PATCH v2] vdpa/ifc: fix null pointer dereference
Fix null pointer dereference reported in coverity scan. Coverity issue: 378882 Fixes: 5d75517beffe ("vdpa/ifc/base: access correct register for blk device") Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index dd475a7..0a9f71a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -255,6 +255,10 @@ u32 ring_state; cfg = hw->common_cfg; + if (!cfg) { + DEBUGOUT("common_cfg in HW is NULL.\n"); + return; + } IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->msix_config); for (i = 0; i < hw->nr_vring; i++) { @@ -262,6 +266,11 @@ IFCVF_WRITE_REG16(0, &cfg->queue_enable); IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector); + if (!hw->lm_cfg) { + DEBUGOUT("live migration cfg in HW is NULL.\n"); + continue; + } + if (hw->device_type == IFCVF_BLK) ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET + -- 1.8.3.1
[PATCH v2] vdpa/ifc: fix null pointer dereference
Fix null pointer dereference reported in coverity scan. Coverity issue: 378882 Fixes: 5d75517beffe ("vdpa/ifc/base: access correct register for blk device") Signed-off-by: Andy Pei Acked-by: Xiao Wang --- drivers/vdpa/ifc/base/ifcvf.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index dd475a7..0a9f71a 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -255,6 +255,10 @@ u32 ring_state; cfg = hw->common_cfg; + if (!cfg) { + DEBUGOUT("common_cfg in HW is NULL.\n"); + return; + } IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->msix_config); for (i = 0; i < hw->nr_vring; i++) { @@ -262,6 +266,11 @@ IFCVF_WRITE_REG16(0, &cfg->queue_enable); IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector); + if (!hw->lm_cfg) { + DEBUGOUT("live migration cfg in HW is NULL.\n"); + continue; + } + if (hw->device_type == IFCVF_BLK) ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET + -- 1.8.3.1
[PATCH] vdpa/ifc: fix vhost message size check issue
For vhost message VHOST_USER_GET_CONFIG, we do not check payload size in vhost lib, we check payload size in driver specific ops. For ifc vdpa driver, we just need to make sure payload size is not smaller than sizeof(struct virtio_blk_config). Fixes: 856d03bcdc54 ("vdpa/ifc: add block operations") Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 8bc971c..ac42de9 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1400,7 +1400,7 @@ struct rte_vdpa_dev_info { uint64_t capacity = 0; uint8_t *byte; - if (size != sizeof(struct virtio_blk_config)) { + if (size < sizeof(struct virtio_blk_config)) { DRV_LOG(ERR, "Invalid len: %u, required: %u", size, (uint32_t)sizeof(struct virtio_blk_config)); return -1; -- 1.8.3.1
[PATCH] vhost: fix virtio blk vDPA live migration IO drop
In the virtio blk vDPA live migration use case, before the live migration process, QEMU will set call fd to vDPA back-end. QEMU and vDPA back-end stand by until live migration starts. During live migration process, QEMU sets kick fd and a new call fd. However, after the kick fd is set to the vDPA back-end, the vDPA back-end configures device and data path starts. The new call fd will cause some kind of "re-configuration", this kind of "re-configuration" cause IO drop. After this patch, vDPA back-end configures device after kick fd and call fd are well set and make sure no IO drops. This patch only impact virtio blk vDPA device and does not impact net device. Fixes: 7015b6577178 ("vdpa/ifc: add block device SW live-migration") Signed-off-by: Andy Pei --- lib/vhost/vhost_user.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 2b9a3b6..cc03f67 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -2946,6 +2946,7 @@ static int is_vring_iotlb(struct virtio_net *dev, int ret; int unlock_required = 0; bool handled; + uint32_t vdpa_type = 0; uint32_t request; uint32_t i; @@ -3152,6 +3153,20 @@ static int is_vring_iotlb(struct virtio_net *dev, if (!vdpa_dev) goto out; + if (vdpa_dev->ops->get_dev_type) { + ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type); + if (ret) { + VHOST_LOG_CONFIG(ERR, "failed to get vdpa dev type.\n"); + ret = -1; + goto out; + } + } else { + vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET; + } + if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK + && request != VHOST_USER_SET_VRING_CALL) + goto out; + if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA device\n", -- 1.8.3.1
[PATCH] vdpa/ifc/base: fix null pointer dereference
Fix null pointer dereference reported in coverity scan. Output some log information when lm_cfg is null. Make lm_cfg is not null before operate on lm_cfg. Coverity issue: 378882 Fixes: d7fe5a2861e7 ("net/ifc: support live migration") Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 31 --- drivers/vdpa/ifc/base/ifcvf_osdep.h | 1 + 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 0a9f71a..f1e1474 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -87,6 +87,8 @@ } hw->lm_cfg = hw->mem_resource[4].addr; + if (!hw->lm_cfg) + WARNINGOUT("HW support live migration not support!\n"); if (hw->common_cfg == NULL || hw->notify_base == NULL || hw->isr == NULL || hw->dev_cfg == NULL) { @@ -218,17 +220,19 @@ &cfg->queue_used_hi); IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); - if (hw->device_type == IFCVF_BLK) - *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - i * IFCVF_LM_CFG_SIZE) = - (u32)hw->vring[i].last_avail_idx | - ((u32)hw->vring[i].last_used_idx << 16); - else - *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + - (i % 2) * 4) = - (u32)hw->vring[i].last_avail_idx | - ((u32)hw->vring[i].last_used_idx << 16); + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + else + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); if (IFCVF_READ_REG16(&cfg->queue_msix_vector) == @@ -320,6 +324,8 @@ u8 *lm_cfg; lm_cfg = hw->lm_cfg; + if (!lm_cfg) + return; *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) = log_base & IFCVF_32_BIT_MASK; @@ -342,6 +348,9 @@ u8 *lm_cfg; lm_cfg = hw->lm_cfg; + if (!lm_cfg) + return; + *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE; } diff --git a/drivers/vdpa/ifc/base/ifcvf_osdep.h b/drivers/vdpa/ifc/base/ifcvf_osdep.h index 6aef25e..8a47fcb 100644 --- a/drivers/vdpa/ifc/base/ifcvf_osdep.h +++ b/drivers/vdpa/ifc/base/ifcvf_osdep.h @@ -14,6 +14,7 @@ #include #include +#define WARNINGOUT(S, args...)RTE_LOG(WARNING, PMD, S, ##args) #define DEBUGOUT(S, args...)RTE_LOG(DEBUG, PMD, S, ##args) #define STATIC static -- 1.8.3.1
[PATCH v2] vdpa/ifc/base: fix null pointer dereference
Fix null pointer dereference reported in coverity scan. Output some log information when lm_cfg is null. Make sure lm_cfg is not null before operate on lm_cfg. Coverity issue: 378882 Fixes: d7fe5a2861e7 ("net/ifc: support live migration") Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 31 --- drivers/vdpa/ifc/base/ifcvf_osdep.h | 1 + 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 0a9f71a..f1e1474 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -87,6 +87,8 @@ } hw->lm_cfg = hw->mem_resource[4].addr; + if (!hw->lm_cfg) + WARNINGOUT("HW support live migration not support!\n"); if (hw->common_cfg == NULL || hw->notify_base == NULL || hw->isr == NULL || hw->dev_cfg == NULL) { @@ -218,17 +220,19 @@ &cfg->queue_used_hi); IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); - if (hw->device_type == IFCVF_BLK) - *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - i * IFCVF_LM_CFG_SIZE) = - (u32)hw->vring[i].last_avail_idx | - ((u32)hw->vring[i].last_used_idx << 16); - else - *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + - (i % 2) * 4) = - (u32)hw->vring[i].last_avail_idx | - ((u32)hw->vring[i].last_used_idx << 16); + if (lm_cfg) { + if (hw->device_type == IFCVF_BLK) + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + else + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); if (IFCVF_READ_REG16(&cfg->queue_msix_vector) == @@ -320,6 +324,8 @@ u8 *lm_cfg; lm_cfg = hw->lm_cfg; + if (!lm_cfg) + return; *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) = log_base & IFCVF_32_BIT_MASK; @@ -342,6 +348,9 @@ u8 *lm_cfg; lm_cfg = hw->lm_cfg; + if (!lm_cfg) + return; + *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE; } diff --git a/drivers/vdpa/ifc/base/ifcvf_osdep.h b/drivers/vdpa/ifc/base/ifcvf_osdep.h index 6aef25e..8a47fcb 100644 --- a/drivers/vdpa/ifc/base/ifcvf_osdep.h +++ b/drivers/vdpa/ifc/base/ifcvf_osdep.h @@ -14,6 +14,7 @@ #include #include +#define WARNINGOUT(S, args...)RTE_LOG(WARNING, PMD, S, ##args) #define DEBUGOUT(S, args...)RTE_LOG(DEBUG, PMD, S, ##args) #define STATIC static -- 1.8.3.1
[PATCH] vdpa/ifc: fix log info mismatch
Fix log info mismatch. Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver") Cc: sta...@dpdk.org Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..d10c1fd 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -94,12 +94,14 @@ return -1; } - DEBUGOUT("capability mapping:\ncommon cfg: %p\n" - "notify base: %p\nisr cfg: %p\ndevice cfg: %p\n" - "multiplier: %u\n", - hw->common_cfg, hw->dev_cfg, - hw->isr, hw->notify_base, - hw->notify_off_multiplier); + DEBUGOUT("capability mapping:\n" +"common cfg: %p\n" +"notify base: %p\n" +"isr cfg: %p\n" +"device cfg: %p\n" +"multiplier: %u\n", +hw->common_cfg, hw->notify_base, hw->isr, hw->dev_cfg, +hw->notify_off_multiplier); return 0; } -- 1.8.3.1
[PATCH] vhost: add some log for vhost message VHOST_USER_SET_VRING_BASE
Usually the last avail index and last used index is 0, but for target device of live migration, the last avail index and last used index is not 0. So I think some log is helpful. Signed-off-by: Andy Pei --- lib/vhost/vhost_user.c | 5 + 1 file changed, 5 insertions(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index a781346..3cb13fb 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -973,6 +973,11 @@ vq->last_avail_idx = msg->payload.state.num; } + VHOST_LOG_CONFIG(INFO, + "vring base idx:%d last_used_idx:%u last_avail_idx:%u.\n", + msg->payload.state.index, vq->last_used_idx, + vq->last_avail_idx); + return RTE_VHOST_MSG_RESULT_OK; } -- 1.8.3.1
[PATCH v2] vhost: add log for VHOST_USER_SET_VRING_BASE
This patch adds log for vring related info in handling of vhost message VHOST_USER_SET_VRING_BASE, which will be useful in live migration case. Signed-off-by: Andy Pei --- lib/vhost/vhost_user.c | 5 + 1 file changed, 5 insertions(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index a781346..cd8c7bc 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -973,6 +973,11 @@ vq->last_avail_idx = msg->payload.state.num; } + VHOST_LOG_CONFIG(INFO, + "vring base idx:%u last_used_idx:%u last_avail_idx:%u.\n", + msg->payload.state.index, vq->last_used_idx, + vq->last_avail_idx); + return RTE_VHOST_MSG_RESULT_OK; } -- 1.8.3.1
[PATCH 00/15] add virtio_blk device support to vdpa/ifc
This patch set add virtio_blk device support to vdpa/ifc driver. With a lot of similarities, I re-use part of vdpa/ifc driver. Distinguish the virtio net and blk device by device id, and implement specific features and ops. Add example to vdpa to support virtio_blk device. To support blk device live migration, some modification to vhost lib. Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg. Andy Pei (15): vdpa/ifc: add support for virtio blk device vhost: add vdpa ops for blk device vdpa/ifc: add blk ops for ifc device vdpa/ifc: add vdpa interrupt for blk device vdpa/ifc: add blk dev sw live migration example/vdpa:add vdpa blk support in example usertools: add support for virtio blk device vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device vdpa/ifc: add some log at VDPA lauch before qemu connect vdpa/ifc: read virtio max_queues from hardware vdpa: add config space change interrupt register and handle for virtio_blk vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe vdpa/ifc/base: for blk device, live migration register is different from net device vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause vhost: make sure each queue callfd is configured drivers/vdpa/ifc/base/ifcvf.c| 42 ++- drivers/vdpa/ifc/base/ifcvf.h| 29 ++- drivers/vdpa/ifc/ifcvf_vdpa.c| 534 --- examples/vdpa/Makefile | 2 +- examples/vdpa/main.c | 8 + examples/vdpa/meson.build| 1 + examples/vdpa/vdpa_blk_compact.c | 152 +++ examples/vdpa/vdpa_blk_compact.h | 118 + examples/vdpa/vhost_user.h | 190 ++ lib/vhost/vdpa_driver.h | 8 +- lib/vhost/vhost_user.c | 15 ++ usertools/dpdk-devbind.py| 8 + 12 files changed, 1053 insertions(+), 54 deletions(-) create mode 100644 examples/vdpa/vdpa_blk_compact.c create mode 100644 examples/vdpa/vdpa_blk_compact.h create mode 100644 examples/vdpa/vhost_user.h -- 1.8.3.1
[PATCH 01/15] vdpa/ifc: add support for virtio blk device
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id. Blk and net device are implemeted with proper feature and ops. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.h | 16 +++- drivers/vdpa/ifc/ifcvf_vdpa.c | 96 +++ 2 files changed, 102 insertions(+), 10 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 573a35f..01522c6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -5,8 +5,17 @@ #ifndef _IFCVF_H_ #define _IFCVF_H_ +#include #include "ifcvf_osdep.h" +#define IFCVF_NET 0 +#define IFCVF_BLK 1 + +/* for BLK */ +#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 +#define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_BLK_DEVICE_ID 0x0002 + #define IFCVF_VENDOR_ID0x1AF4 #define IFCVF_DEVICE_ID0x1041 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 @@ -57,7 +66,6 @@ #define IFCVF_32_BIT_MASK 0x - struct ifcvf_pci_cap { u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */ u8 cap_next;/* Generic PCI field: next ptr. */ @@ -126,7 +134,11 @@ struct ifcvf_hw { u8 notify_region; u32notify_off_multiplier; struct ifcvf_pci_common_cfg *common_cfg; - struct ifcvf_net_config *dev_cfg; + union { + struct ifcvf_net_config *net_cfg; + struct virtio_blk_config *blk_cfg; + void *dev_cfg; + }; u8 *isr; u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 3853c4c..48056d1 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -58,6 +58,7 @@ struct ifcvf_internal { struct rte_vdpa_device *vdev; uint16_t max_queues; uint64_t features; + int device_type; rte_atomic32_t started; rte_atomic32_t dev_attached; rte_atomic32_t running; @@ -75,6 +76,14 @@ struct internal_list { struct ifcvf_internal *internal; }; +/** +** vdpa decice info includes device features and devcic operation. +**/ +struct rte_vdpa_dev_info { + uint64_t features; + struct rte_vdpa_dev_ops *ops; +}; + TAILQ_HEAD(internal_list_head, internal_list); static struct internal_list_head internal_list = TAILQ_HEAD_INITIALIZER(internal_list); @@ -1170,6 +1179,50 @@ struct internal_list { return 0; } +static int16_t +ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev) +{ + uint16_t pci_device_id = pci_dev->id.device_id; + uint16_t device_id; + + if (pci_device_id < 0x1000 || pci_device_id > 0x107f) { + DRV_LOG(ERR, "Probe device is not a virtio device\n"); + return -1; + } + + if (pci_device_id < 0x1040) { + /** + ** Transitional devices: use the PCI subsystem device id as + ** virtio device id, same as legacy driver always did. + **/ + device_id = pci_dev->id.subsystem_device_id; + } else { + /** + ** Modern devices: simply use PCI device id, + ** but start from 0x1040. + **/ + device_id = pci_device_id - 0x1040; + } + + return device_id; +} + +struct rte_vdpa_dev_info dev_info[] = { + { + .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | + (1ULL << VIRTIO_NET_F_CTRL_VQ) | + (1ULL << VIRTIO_NET_F_STATUS) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << VHOST_F_LOG_ALL), + .ops = &ifcvf_ops, + }, + { + .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << VHOST_F_LOG_ALL), + .ops = NULL, + }, +}; + static int ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) @@ -1181,6 +1234,7 @@ struct internal_list { int sw_fallback_lm = 0; struct rte_kvargs *kvlist = NULL; int ret = 0; + int16_t device_id; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1230,13 +1284,24 @@ struct internal_list { internal->configured = 0; internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); - internal->features = (features & - ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) | - (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | - (1ULL << VIRTIO_NET_F_CTRL_VQ) | - (1ULL << VIRTIO_NET_F_STATUS
[PATCH 02/15] vhost: add vdpa ops for blk device
Get_config and set_config are necessary ops for blk device. Add get_config and set_config ops to vdpa ops. Signed-off-by: Andy Pei --- lib/vhost/vdpa_driver.h | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index fc2d6ac..9a23db9 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops { /** Reset statistics of the queue */ int (*reset_stats)(struct rte_vdpa_device *dev, int qid); - /** Reserved for future extension */ - void *reserved[2]; + /** Get the device configuration space */ + int (*get_config)(int vid, uint8_t *config, uint32_t len); + + /** Set the device configuration space */ + int (*set_config)(int vid, uint8_t *config, uint32_t offset, + uint32_t size, uint32_t flags); }; /** -- 1.8.3.1
[PATCH 03/15] vdpa/ifc: add blk ops for ifc device
For virtio blk device, re-use part of ifc driver ops. Implement ifcvf_blk_get_config for virtio blk device. Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio blk device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.h | 4 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 88 ++- 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 01522c6..769c603 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -66,6 +66,10 @@ #define IFCVF_32_BIT_MASK 0x +#ifndef VHOST_USER_PROTOCOL_F_CONFIG +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#endif + struct ifcvf_pci_cap { u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */ u8 cap_next;/* Generic PCI field: next ptr. */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 48056d1..965baa2 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1093,6 +1093,10 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) + +#define VDPA_BLK_PROTOCOL_FEATURES \ + (1ULL << VHOST_USER_PROTOCOL_F_CONFIG) + static int ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features) { @@ -1207,6 +1211,88 @@ struct rte_vdpa_dev_info { return device_id; } +static int +ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len) +{ + struct virtio_blk_config *dev_cfg; + struct ifcvf_internal *internal; + struct rte_vdpa_device *vdev; + struct internal_list *list; + uint32_t i; + __u64 capacity = 0; + uint8_t *byte; + + if (len < sizeof(struct virtio_blk_config)) { + DRV_LOG(ERR, "Invalid len: %u, required: %lu", + len, sizeof(struct virtio_blk_config)); + return -1; + } + + vdev = rte_vhost_get_vdpa_device(vid); + list = find_internal_resource_by_vdev(vdev); + if (list == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %p", vdev); + return -1; + } + + internal = list->internal; + + for (i = 0; i < sizeof(struct virtio_blk_config); i++) + config[i] = *((u8 *)internal->hw.blk_cfg + i); + + dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg; + + /** + ** cannot read 64-bit register in one attempt, + ** so read byte by byte. + **/ + for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) { + byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i; + capacity |= (__u64)*byte << (i * 8); + } + DRV_LOG(INFO, "capacity : %quG", capacity >> 21); + + DRV_LOG(INFO, "size_max : 0x%08x", dev_cfg->size_max); + DRV_LOG(INFO, "seg_max : 0x%08x", dev_cfg->seg_max); + DRV_LOG(INFO, "blk_size : 0x%08x", dev_cfg->blk_size); + DRV_LOG(INFO, "geometry"); + DRV_LOG(INFO, " cylinders: %u", dev_cfg->geometry.cylinders); + DRV_LOG(INFO, " heads: %u", dev_cfg->geometry.heads); + DRV_LOG(INFO, " sectors : %u", dev_cfg->geometry.sectors); + DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues); + + DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n", + config[0], config[1], config[2], config[3], config[4], + config[5], config[6], config[7]); + return 0; +} + +static int +ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev, + uint64_t *features) +{ + RTE_SET_USED(vdev); + + *features = VDPA_SUPPORTED_PROTOCOL_FEATURES; + *features |= VDPA_BLK_PROTOCOL_FEATURES; + return 0; +} + +static struct rte_vdpa_dev_ops ifcvf_blk_ops = { + .get_queue_num = ifcvf_get_queue_num, + .get_features = ifcvf_get_vdpa_features, + .set_features = ifcvf_set_features, + .get_protocol_features = ifcvf_blk_get_protocol_features, + .dev_conf = ifcvf_dev_config, + .dev_close = ifcvf_dev_close, + .set_vring_state = NULL, + .migration_done = NULL, + .get_vfio_group_fd = ifcvf_get_vfio_group_fd, + .get_vfio_device_fd = ifcvf_get_vfio_device_fd, + .get_notify_area = ifcvf_get_notify_area, + .get_config = ifcvf_blk_get_config, +}; + struct rte_vdpa_dev_info dev_info[] = { { .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | @@ -1219,7 +1305,7 @@ struct rte_vdpa_dev_info dev_info[] = { { .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | (1ULL << VHOST_F_LOG_ALL), - .ops = NULL, + .ops = &ifcvf_blk_ops, }, }; -- 1.8.3.1
[PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device
For the blk we need to relay all the cmd of each queue. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 48 +-- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 965baa2..9729490 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -374,24 +374,50 @@ struct rte_vdpa_dev_info { irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; fd_ptr = (int *)&irq_set->data; + /* The first interrupt is for the configure space change notification */ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(internal->pdev->intr_handle); for (i = 0; i < nr_vring; i++) internal->intr_fd[i] = -1; - for (i = 0; i < nr_vring; i++) { - rte_vhost_get_vhost_vring(internal->vid, i, &vring); - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; - if ((i & 1) == 0 && m_rx == true) { - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - DRV_LOG(ERR, "can't setup eventfd: %s", - strerror(errno)); - return -1; + if (internal->device_type == IFCVF_NET) { + for (i = 0; i < nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vring); + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; + if ((i & 1) == 0 && m_rx == true) { + /** + ** For the net we only need to relay rx queue, + ** which will change the mem of VM. + **/ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + DRV_LOG(ERR, "can't setup eventfd: %s", + strerror(errno)); + return -1; + } + internal->intr_fd[i] = fd; + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; + } + } + } else if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vring); + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; + if (m_rx == true) { + /** + ** For the blk we need to relay all the read cmd + ** of each queue + **/ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + DRV_LOG(ERR, "can't setup eventfd: %s", + strerror(errno)); + return -1; + } + internal->intr_fd[i] = fd; + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; } - internal->intr_fd[i] = fd; - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; } } -- 1.8.3.1
[PATCH 05/15] vdpa/ifc: add blk dev sw live migration
Enable virtio blk sw live migration relay callfd and log the dirty page. In this version we ignore the write cmd and still mark it dirty. Maybe we can improve it later. Signed-off-by: Jin Yu Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 4 +- drivers/vdpa/ifc/base/ifcvf.h | 6 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 130 +++--- 3 files changed, 118 insertions(+), 22 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..3a69e53 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -189,7 +189,7 @@ IFCVF_WRITE_REG32(val >> 32, hi); } -STATIC int +int ifcvf_hw_enable(struct ifcvf_hw *hw) { struct ifcvf_pci_common_cfg *cfg; @@ -238,7 +238,7 @@ return 0; } -STATIC void +void ifcvf_hw_disable(struct ifcvf_hw *hw) { u32 i; diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -179,4 +179,10 @@ struct ifcvf_hw { u64 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid); +int +ifcvf_hw_enable(struct ifcvf_hw *hw); + +void +ifcvf_hw_disable(struct ifcvf_hw *hw); + #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 9729490..1f832a3 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -334,10 +334,68 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &features); if (RTE_VHOST_NEED_LOG(features)) { - ifcvf_disable_logging(hw); - rte_vhost_get_log_base(internal->vid, &log_base, &log_size); - rte_vfio_container_dma_unmap(internal->vfio_container_fd, - log_base, IFCVF_LOG_BASE, log_size); + if (internal->device_type == IFCVF_NET) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, + &log_size); + rte_vfio_container_dma_unmap( + internal->vfio_container_fd, log_base, + IFCVF_LOG_BASE, log_size); + } + /** + ** IFCVF marks dirty memory pages for only packet buffer, + ** SW helps to mark the used ring as dirty after device stops. + **/ + for (i = 0; i < hw->nr_vring; i++) { + len = IFCVF_USED_RING_LEN(hw->vring[i].size); + rte_vhost_log_used_vring(vid, i, 0, len); + } + } +} + +static void +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) +{ + struct ifcvf_hw *hw = &internal->hw; + struct rte_vhost_vring vq; + int i, vid; + uint64_t features = 0; + uint64_t log_base = 0, log_size = 0; + uint64_t len; + + vid = internal->vid; + + if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < hw->nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vq); + while (vq.avail->idx != vq.used->idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + hw->vring[i].last_avail_idx = vq.avail->idx; + hw->vring[i].last_used_idx = vq.used->idx; + } + } + + ifcvf_hw_disable(hw); + + for (i = 0; i < hw->nr_vring; i++) + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, + hw->vring[i].last_used_idx); + + if (internal->sw_lm) + return; + + rte_vhost_get_negotiated_features(vid, &features); + if (RTE_VHOST_NEED_LOG(features)) { + if (internal->device_type == IFCVF_NET) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, + &log_size); + rte_vfio_container_dma_unmap( + internal->vfio_container_fd, log_base, + IFCVF_LOG_BASE, log_size); + } /* * IFCVF marks dirty memory pages for only packet buffer, * SW helps to mark the used ring as dirty after device stops. @@ -665,15 +723,18 @@ struct rte_vdpa_dev_info { } hw->vring[i].avail = gpa; - /* Direct I/O for Tx queue, relay for Rx queue */ - if (i & 1) { + /** + ** NETWORK: Direct I/O for Tx queue, relay for
[PATCH 06/15] example/vdpa:add vdpa blk support in example
Signed-off-by: Andy Pei --- examples/vdpa/Makefile | 2 +- examples/vdpa/main.c | 8 ++ examples/vdpa/meson.build| 1 + examples/vdpa/vdpa_blk_compact.c | 152 +++ examples/vdpa/vdpa_blk_compact.h | 118 examples/vdpa/vhost_user.h | 190 +++ 6 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 examples/vdpa/vdpa_blk_compact.c create mode 100644 examples/vdpa/vdpa_blk_compact.h create mode 100644 examples/vdpa/vhost_user.h diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile index d974db4..9d0479b 100644 --- a/examples/vdpa/Makefile +++ b/examples/vdpa/Makefile @@ -5,7 +5,7 @@ APP = vdpa # all source are stored in SRCS-y -SRCS-y := main.c +SRCS-y := main.c vdpa_blk_compact.c CFLAGS += -DALLOW_EXPERIMENTAL_API PKGCONF ?= pkg-config diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index 5ab0765..3fa3d3a 100644 --- a/examples/vdpa/main.c +++ b/examples/vdpa/main.c @@ -20,6 +20,7 @@ #include #include #include +#include "vdpa_blk_compact.h" #define MAX_PATH_LEN 128 #define MAX_VDPA_SAMPLE_PORTS 1024 @@ -156,6 +157,7 @@ struct vdpa_port { static const struct rte_vhost_device_ops vdpa_sample_devops = { .new_device = new_device, .destroy_device = destroy_device, + .new_connection = rte_vhost_blk_session_install_rte_compat_hooks, }; static int @@ -192,6 +194,12 @@ struct vdpa_port { "attach vdpa device failed: %s\n", socket_path); + if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev) + < 0) + rte_exit(EXIT_FAILURE, + "set vhost blk driver features and protocal features failed: %s\n", + socket_path); + if (rte_vhost_driver_start(socket_path) < 0) rte_exit(EXIT_FAILURE, "start vhost driver failed: %s\n", diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build index bd08605..f0d111c 100644 --- a/examples/vdpa/meson.build +++ b/examples/vdpa/meson.build @@ -15,4 +15,5 @@ deps += 'vhost' allow_experimental_apis = true sources = files( 'main.c', + 'vdpa_blk_compact.c', ) diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c new file mode 100644 index 000..7310ebb --- /dev/null +++ b/examples/vdpa/vdpa_blk_compact.c @@ -0,0 +1,152 @@ +/* +**INTEL CONFIDENTIAL +** +**Copyright (c) Intel Corporation. +**All rights reserved. +** +**The source code contained or described herein and all documents related +**to the source code ("Material") are owned by Intel Corporation or its +**suppliers or licensors. Title to the Material remains with Intel +**Corporation or its suppliers and licensors. The Material contains trade +**secrets and proprietary and confidential information of Intel or its +**suppliers and licensors. The Material is protected by worldwide +**copyright and trade secret laws and treaty provisions. No part of the +**Material may be used, copied, reproduced, modified, published, uploaded, +**posted, transmitted, distributed, or disclosed in any way without Intel's +**prior express written permission. +** +**No license under any patent, copyright, trade secret or other +**intellectual property right is granted to or conferred upon you by +**disclosure or delivery of the Materials, either expressly, by +**implication, inducement, estoppel or otherwise. Any license under such +**intellectual property rights must be express and approved by Intel in +**writing. +*/ + +/** +** @file +** +** Block device specific vhost lib +**/ + +#include + +#include +#include +#include +#include "vdpa_blk_compact.h" +#include "vhost_user.h" + +#define VHOST_USER_GET_CONFIG 24 +#define VHOST_USER_SET_CONFIG 25 + +#ifndef VHOST_USER_PROTOCOL_F_CONFIG +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#endif + +/* + * Function to handle vhost user blk message + */ +static enum rte_vhost_msg_result +rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg) +{ + struct VhostUserMsg *msg = _msg; + struct rte_vdpa_device *vdev = NULL; + + vdev = rte_vhost_get_vdpa_device(vid); + if (vdev == NULL) + return RTE_VHOST_MSG_RESULT_ERR; + + fprintf(stderr, "msg is %d\n", msg->request.master); + switch (msg->request.master) { + case VHOST_USER_GET_CONFIG: { + int rc = 0; + + fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n"); + + if (vdev->ops->get_config) { + fprintf(stdout, "get_config() fun
[PATCH 07/15] usertools: add support for virtio blk device
Signed-off-by: Andy Pei --- usertools/dpdk-devbind.py | 8 1 file changed, 8 insertions(+) diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py index ace4627..cbe336f 100755 --- a/usertools/dpdk-devbind.py +++ b/usertools/dpdk-devbind.py @@ -14,6 +14,8 @@ from os.path import join as path_join # The PCI base class for all devices +virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001', +'SVendor': '8086', 'SDevice': '0002'} network_class = {'Class': '02', 'Vendor': None, 'Device': None, 'SVendor': None, 'SDevice': None} acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None, @@ -72,6 +74,7 @@ cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4', 'SVendor': None, 'SDevice': None} +virtio_blk_devices = [virtio_blk_class] network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class] baseband_devices = [acceleration_class] crypto_devices = [encryption_class, intel_processor_class] @@ -587,6 +590,9 @@ def show_status(): Displays to the user what devices are bound to the igb_uio driver, the kernel driver or to no driver''' +if status_dev in ["virtio_blk", "all"]: +show_device_status(virtio_blk_devices, "virtio_blk") + if status_dev in ["net", "all"]: show_device_status(network_devices, "Network", if_field=True) @@ -746,6 +752,7 @@ def do_arg_actions(): if b_flag is not None: clear_data() # refresh if we have changed anything +get_device_details(virtio_blk_devices) get_device_details(network_devices) get_device_details(baseband_devices) get_device_details(crypto_devices) @@ -769,6 +776,7 @@ def main(): parse_args() check_modules() clear_data() +get_device_details(virtio_blk_devices) get_device_details(network_devices) get_device_details(baseband_devices) get_device_details(crypto_devices) -- 1.8.3.1
[PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 1f832a3..eff6ff3 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1384,6 +1384,16 @@ struct rte_vdpa_dev_info { } static int +ifcvf_blk_set_vring_state(int vid, int vring, int state) +{ + RTE_SET_USED(vid); + RTE_SET_USED(vring); + RTE_SET_USED(state); + + return 0; +} + +static int ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features) { @@ -1401,7 +1411,7 @@ struct rte_vdpa_dev_info { .get_protocol_features = ifcvf_blk_get_protocol_features, .dev_conf = ifcvf_dev_config, .dev_close = ifcvf_dev_close, - .set_vring_state = NULL, + .set_vring_state = ifcvf_blk_set_vring_state, .migration_done = NULL, .get_vfio_group_fd = ifcvf_get_vfio_group_fd, .get_vfio_device_fd = ifcvf_get_vfio_device_fd, -- 1.8.3.1
[PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect
Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index eff6ff3..0b4b77f 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1447,6 +1447,9 @@ struct rte_vdpa_dev_info dev_info[] = { struct rte_kvargs *kvlist = NULL; int ret = 0; int16_t device_id; + __u64 capacity = 0; + uint8_t *byte; + uint32_t i; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1513,6 +1516,32 @@ struct rte_vdpa_dev_info dev_info[] = { internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_BLK].features; + + /** + ** cannot read 64-bit register in one attempt, + ** so read byte by byte. + **/ + for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) { + byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i; + capacity |= (__u64)*byte << (i * 8); + } + DRV_LOG(INFO, "capacity : %quG", capacity >> 21); + + DRV_LOG(INFO, "size_max : 0x%08x", + internal->hw.blk_cfg->size_max); + DRV_LOG(INFO, "seg_max : 0x%08x", + internal->hw.blk_cfg->seg_max); + DRV_LOG(INFO, "blk_size : 0x%08x", + internal->hw.blk_cfg->blk_size); + DRV_LOG(INFO, "geometry"); + DRV_LOG(INFO, "cylinders: %u", + internal->hw.blk_cfg->geometry.cylinders); + DRV_LOG(INFO, "heads: %u", + internal->hw.blk_cfg->geometry.heads); + DRV_LOG(INFO, "sectors : %u", + internal->hw.blk_cfg->geometry.sectors); + DRV_LOG(INFO, "num_queues: 0x%08x", + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware
original code max_queues is set to IFCVF_MAX_QUEUES. New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 0b4b77f..f092aca 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1542,6 +1542,10 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(INFO, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + /* reset max_queue here, to minimum modification */ + internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
Create a thread to poll and relay config space change interrupt. Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 113 ++ 1 file changed, 113 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index f092aca..2552375 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -53,7 +53,9 @@ struct ifcvf_internal { int vfio_group_fd; int vfio_dev_fd; pthread_t tid; /* thread for notify relay */ + pthread_t intr_tid; /* thread for intr relay */ int epfd; + int csc_fd; int vid; struct rte_vdpa_device *vdev; uint16_t max_queues; @@ -622,6 +624,108 @@ struct rte_vdpa_dev_info { return 0; } +static void +virtio_interrupt_handler(struct ifcvf_internal *internal) +{ + int vid = internal->vid; + int ret; + + ret = rte_vhost_slave_config_change(vid, 1); + if (ret) + DRV_LOG(ERR, "failed to notify the guest about configuration space change."); + + return; +} + +static void * +intr_relay(void *arg) +{ + struct ifcvf_internal *internal = (struct ifcvf_internal *)arg; + struct epoll_event csc_event; + struct epoll_event ev; + uint64_t buf; + int nbytes; + int csc_fd, csc_val = 0; + + csc_fd = epoll_create(1); + if (csc_fd < 0) { + DRV_LOG(ERR, "failed to create epoll for config space change."); + return NULL; + } + + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; + ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle); + if (epoll_ctl(csc_fd, EPOLL_CTL_ADD, + rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) { + DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); + return NULL; + } + + internal->csc_fd = csc_fd; + + for (;;) { + csc_val = epoll_wait(csc_fd, &csc_event, 1, -1); + if (csc_val < 0) { + if (errno == EINTR) + continue; + DRV_LOG(ERR, "epoll_wait return fail\n"); + return NULL; + } else if (csc_val == 0) { + continue; + } else { + /* csc_val > 0 */ + nbytes = read(csc_event.data.fd, &buf, 8); + if (nbytes < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n", + csc_event.data.fd, + strerror(errno)); + return NULL; + } else if (nbytes == 0) { + DRV_LOG(ERR, "Read nothing from file descriptor %d\n", + csc_event.data.fd); + continue; + } else { + virtio_interrupt_handler(internal); + } + } + } + return NULL; +} + +static int +setup_intr_relay(struct ifcvf_internal *internal) +{ + int ret; + + ret = pthread_create(&internal->intr_tid, NULL, intr_relay, + (void *)internal); + if (ret) { + DRV_LOG(ERR, "failed to create notify relay pthread."); + return -1; + } + return 0; +} + +static int +unset_intr_relay(struct ifcvf_internal *internal) +{ + void *status; + + if (internal->intr_tid) { + pthread_cancel(internal->intr_tid); + pthread_join(internal->intr_tid, &status); + } + internal->intr_tid = 0; + + if (internal->csc_fd >= 0) + close(internal->csc_fd); + internal->csc_fd = -1; + + return 0; +} + static int update_datapath(struct ifcvf_internal *internal) { @@ -648,10 +752,16 @@ struct rte_vdpa_dev_info { if (ret) goto err; + ret = setup_intr_relay(internal); + if (ret) + goto err; + rte_atomic32_set(&internal->running, 1); } else if (rte_atomic32_read(&internal->running) && (!rte_atomic32_read(&internal->started) || !rte_atomic32_read(&internal->dev_attached))) { + ret = unset_intr_relay(internal); + ret = unset_notify_relay(internal); if (ret) goto
[PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.h | 1 + drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 6dd7925..8e602af 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -149,6 +149,7 @@ struct ifcvf_hw { u8 *lm_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; + u8 is_blk; struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE]; }; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 2552375..546f9bd 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1621,11 +1621,13 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->device_type = IFCVF_NET; + internal->hw.is_blk = IFCVF_NET; internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; } else if (device_id == VIRTIO_ID_BLOCK) { internal->device_type = IFCVF_BLK; + internal->hw.is_blk = IFCVF_BLK; internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_BLK].features; -- 1.8.3.1
[PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
1.last_avail_idx is lower 16 bit of the register. 2.address of ring_state register is different between net and blk device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 36 +--- drivers/vdpa/ifc/base/ifcvf.h | 1 + 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 3a69e53..a8a4728 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -216,10 +216,18 @@ &cfg->queue_used_hi); IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); - *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) = - (u32)hw->vring[i].last_avail_idx | - ((u32)hw->vring[i].last_used_idx << 16); + if (hw->is_blk == IFCVF_BLK) { + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } else if (hw->is_blk == IFCVF_NET) { + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); if (IFCVF_READ_REG16(&cfg->queue_msix_vector) == @@ -252,9 +260,23 @@ IFCVF_WRITE_REG16(i, &cfg->queue_select); IFCVF_WRITE_REG16(0, &cfg->queue_enable); IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector); - ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4); - hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + + if (hw->is_blk) { + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + } else if (hw->is_blk == IFCVF_NET) { + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + } + + if (hw->is_blk == IFCVF_BLK) + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + else if (hw->is_blk == IFCVF_NET) + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); hw->vring[i].last_used_idx = (u16)(ring_state >> 16); } } diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 8e602af..7367094 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -65,6 +65,7 @@ #define IFCVF_MEDIATED_VRING 0x2000 #define IFCVF_32_BIT_MASK 0x +#define IFCVF_16_BIT_MASK 0x #ifndef VHOST_USER_PROTOCOL_F_CONFIG #define VHOST_USER_PROTOCOL_F_CONFIG 9 -- 1.8.3.1
[PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 2 +- drivers/vdpa/ifc/base/ifcvf.h | 3 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index a8a4728..7018048 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -116,7 +116,7 @@ IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status); } -STATIC void +void ifcvf_reset(struct ifcvf_hw *hw) { ifcvf_set_status(hw, 0); diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 7367094..f22d18b 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -157,6 +157,9 @@ struct ifcvf_hw { int ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev); +void +ifcvf_reset(struct ifcvf_hw *hw); + u64 ifcvf_get_features(struct ifcvf_hw *hw); diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 546f9bd..ff233bc 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -359,23 +359,32 @@ struct rte_vdpa_dev_info { vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) { struct ifcvf_hw *hw = &internal->hw; - struct rte_vhost_vring vq; int i, vid; uint64_t features = 0; uint64_t log_base = 0, log_size = 0; uint64_t len; + u32 ring_state = 0; vid = internal->vid; if (internal->device_type == IFCVF_BLK) { for (i = 0; i < hw->nr_vring; i++) { - rte_vhost_get_vhost_vring(internal->vid, i, &vq); - while (vq.avail->idx != vq.used->idx) { - ifcvf_notify_queue(hw, i); - usleep(10); - } - hw->vring[i].last_avail_idx = vq.avail->idx; - hw->vring[i].last_used_idx = vq.used->idx; + do { + if (hw->lm_cfg != NULL) + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + hw->vring[i].last_used_idx = + (u16)(ring_state >> 16); + if (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + } while (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx); } } @@ -766,7 +775,12 @@ struct rte_vdpa_dev_info { if (ret) goto err; - vdpa_ifcvf_stop(internal); + if (internal->device_type == IFCVF_BLK) { + vdpa_ifcvf_blk_pause(internal); + ifcvf_reset(&internal->hw); + } else { + vdpa_ifcvf_stop(internal); + } ret = vdpa_disable_vfio_intr(internal); if (ret) -- 1.8.3.1
[PATCH 15/15] vhost: make sure each queue callfd is configured
During the vhost data path building process, qemu will create a call fd at first, and create another call fd in the end. The final call fd will be used to relay notify. In the original code, after kick fd is set, dev_conf will set the first call fd. Even though the actual call fd will set, the data path will not work correctly. Signed-off-by: Andy Pei --- lib/vhost/vhost_user.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 5eb1dd6..0be879a 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -3137,12 +3137,27 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev, if (!vdpa_dev) goto out; + if (request != VHOST_USER_SET_VRING_CALL) + goto out; + if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) VHOST_LOG_CONFIG(ERR, "Failed to configure vDPA device\n"); else dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED; + } else { + /** + ** when VIRTIO_DEV_VDPA_CONFIGURED already configured + ** close the device and config the device again, + ** make sure the call fd of each queue is configed correctly. + **/ + if (vdpa_dev->ops->dev_close(dev->vid)) + VHOST_LOG_CONFIG(ERR, +"Failed to close vDPA device\n"); + if (vdpa_dev->ops->dev_conf(dev->vid)) + VHOST_LOG_CONFIG(ERR, +"Failed to re-config vDPA device\n"); } out: -- 1.8.3.1
[PATCH v2 00/15] add virtio_blk device support to vdpa/ifc
This patch set add virtio_blk device support to vdpa/ifc driver. With a lot of similarities, I re-use part of vdpa/ifc driver. Distinguish the virtio net and blk device by device id, and implement specific features and ops. Add example to vdpa to support virtio_blk device. To support blk device live migration, some modification to vhost lib. Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg. v2: Fix some coding style issue. Andy Pei (15): vdpa/ifc: add support for virtio blk device vhost: add vdpa ops for blk device vdpa/ifc: add blk ops for ifc device vdpa/ifc: add vdpa interrupt for blk device vdpa/ifc: add blk dev sw live migration example/vdpa:add vdpa blk support in example usertools: add support for virtio blk device vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device vdpa/ifc: add some log at VDPA launch before qemu connect vdpa/ifc: read virtio max_queues from hardware vdpa: add config space change interrupt register and handle for virtio_blk vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe vdpa/ifc/base: for blk device, live migration register is different from net device vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause vhost: make sure each queue callfd is configured drivers/vdpa/ifc/base/ifcvf.c| 42 +++- drivers/vdpa/ifc/base/ifcvf.h| 29 ++- drivers/vdpa/ifc/ifcvf_vdpa.c| 520 --- examples/vdpa/Makefile | 2 +- examples/vdpa/main.c | 8 + examples/vdpa/meson.build| 1 + examples/vdpa/vdpa_blk_compact.c | 150 +++ examples/vdpa/vdpa_blk_compact.h | 117 + examples/vdpa/vhost_user.h | 189 ++ lib/vhost/vdpa_driver.h | 8 +- lib/vhost/vhost_user.c | 14 ++ usertools/dpdk-devbind.py| 8 + 12 files changed, 1034 insertions(+), 54 deletions(-) create mode 100644 examples/vdpa/vdpa_blk_compact.c create mode 100644 examples/vdpa/vdpa_blk_compact.h create mode 100644 examples/vdpa/vhost_user.h -- 1.8.3.1
[PATCH v2 01/15] vdpa/ifc: add support for virtio blk device
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id. Blk and net device are implemented with proper feature and ops. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.h | 16 +++- drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++ 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 573a35f..01522c6 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -5,8 +5,17 @@ #ifndef _IFCVF_H_ #define _IFCVF_H_ +#include #include "ifcvf_osdep.h" +#define IFCVF_NET 0 +#define IFCVF_BLK 1 + +/* for BLK */ +#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001 +#define IFCVF_BLK_MODERN_DEVICE_ID 0x1042 +#define IFCVF_BLK_DEVICE_ID 0x0002 + #define IFCVF_VENDOR_ID0x1AF4 #define IFCVF_DEVICE_ID0x1041 #define IFCVF_SUBSYS_VENDOR_ID 0x8086 @@ -57,7 +66,6 @@ #define IFCVF_32_BIT_MASK 0x - struct ifcvf_pci_cap { u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */ u8 cap_next;/* Generic PCI field: next ptr. */ @@ -126,7 +134,11 @@ struct ifcvf_hw { u8 notify_region; u32notify_off_multiplier; struct ifcvf_pci_common_cfg *common_cfg; - struct ifcvf_net_config *dev_cfg; + union { + struct ifcvf_net_config *net_cfg; + struct virtio_blk_config *blk_cfg; + void *dev_cfg; + }; u8 *isr; u16*notify_base; u16*notify_addr[IFCVF_MAX_QUEUES * 2]; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 3853c4c..96b67dd 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -58,6 +58,7 @@ struct ifcvf_internal { struct rte_vdpa_device *vdev; uint16_t max_queues; uint64_t features; + int device_type; rte_atomic32_t started; rte_atomic32_t dev_attached; rte_atomic32_t running; @@ -75,6 +76,12 @@ struct internal_list { struct ifcvf_internal *internal; }; +/* vdpa device info includes device features and devcic operation. */ +struct rte_vdpa_dev_info { + uint64_t features; + struct rte_vdpa_dev_ops *ops; +}; + TAILQ_HEAD(internal_list_head, internal_list); static struct internal_list_head internal_list = TAILQ_HEAD_INITIALIZER(internal_list); @@ -1170,6 +1177,48 @@ struct internal_list { return 0; } +static int16_t +ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev) +{ + uint16_t pci_device_id = pci_dev->id.device_id; + uint16_t device_id; + + if (pci_device_id < 0x1000 || pci_device_id > 0x107f) { + DRV_LOG(ERR, "Probe device is not a virtio device\n"); + return -1; + } + + if (pci_device_id < 0x1040) { + /* Transitional devices: use the PCI subsystem device id as +* virtio device id, same as legacy driver always did. +*/ + device_id = pci_dev->id.subsystem_device_id; + } else { + /* Modern devices: simply use PCI device id, +* but start from 0x1040. +*/ + device_id = pci_device_id - 0x1040; + } + + return device_id; +} + +struct rte_vdpa_dev_info dev_info[] = { + { + .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | + (1ULL << VIRTIO_NET_F_CTRL_VQ) | + (1ULL << VIRTIO_NET_F_STATUS) | + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << VHOST_F_LOG_ALL), + .ops = &ifcvf_ops, + }, + { + .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | + (1ULL << VHOST_F_LOG_ALL), + .ops = NULL, + }, +}; + static int ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) @@ -1181,6 +1230,7 @@ struct internal_list { int sw_fallback_lm = 0; struct rte_kvargs *kvlist = NULL; int ret = 0; + int16_t device_id; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1230,13 +1280,24 @@ struct internal_list { internal->configured = 0; internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); - internal->features = (features & - ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) | - (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | - (1ULL << VIRTIO_NET_F_CTRL_VQ) | - (1ULL << VIRTIO_NET_F_STATUS) | - (1ULL << VHOST_USER_F_P
[PATCH v2 02/15] vhost: add vdpa ops for blk device
Get_config and set_config are necessary ops for blk device. Add get_config and set_config ops to vdpa ops. Signed-off-by: Andy Pei --- lib/vhost/vdpa_driver.h | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h index fc2d6ac..9a23db9 100644 --- a/lib/vhost/vdpa_driver.h +++ b/lib/vhost/vdpa_driver.h @@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops { /** Reset statistics of the queue */ int (*reset_stats)(struct rte_vdpa_device *dev, int qid); - /** Reserved for future extension */ - void *reserved[2]; + /** Get the device configuration space */ + int (*get_config)(int vid, uint8_t *config, uint32_t len); + + /** Set the device configuration space */ + int (*set_config)(int vid, uint8_t *config, uint32_t offset, + uint32_t size, uint32_t flags); }; /** -- 1.8.3.1
[PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device
For virtio blk device, re-use part of ifc driver ops. Implement ifcvf_blk_get_config for virtio blk device. Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio blk device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.h | 4 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 01522c6..769c603 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -66,6 +66,10 @@ #define IFCVF_32_BIT_MASK 0x +#ifndef VHOST_USER_PROTOCOL_F_CONFIG +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#endif + struct ifcvf_pci_cap { u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */ u8 cap_next;/* Generic PCI field: next ptr. */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 96b67dd..57fdd2c 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1091,6 +1091,10 @@ struct rte_vdpa_dev_info { 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \ 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \ 1ULL << VHOST_USER_PROTOCOL_F_STATUS) + +#define VDPA_BLK_PROTOCOL_FEATURES \ + (1ULL << VHOST_USER_PROTOCOL_F_CONFIG) + static int ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features) { @@ -1203,6 +1207,85 @@ struct rte_vdpa_dev_info { return device_id; } +static int +ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len) +{ + struct virtio_blk_config *dev_cfg; + struct ifcvf_internal *internal; + struct rte_vdpa_device *vdev; + struct internal_list *list; + uint32_t i; + __u64 capacity = 0; + uint8_t *byte; + + if (len < sizeof(struct virtio_blk_config)) { + DRV_LOG(ERR, "Invalid len: %u, required: %lu", + len, sizeof(struct virtio_blk_config)); + return -1; + } + + vdev = rte_vhost_get_vdpa_device(vid); + list = find_internal_resource_by_vdev(vdev); + if (list == NULL) { + DRV_LOG(ERR, "Invalid vDPA device: %p", vdev); + return -1; + } + + internal = list->internal; + + for (i = 0; i < sizeof(struct virtio_blk_config); i++) + config[i] = *((u8 *)internal->hw.blk_cfg + i); + + dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg; + + /* cannot read 64-bit register in one attempt, so read byte by byte. */ + for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) { + byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i; + capacity |= (__u64)*byte << (i * 8); + } + DRV_LOG(INFO, "capacity : %quG", capacity >> 21); + + DRV_LOG(INFO, "size_max : 0x%08x", dev_cfg->size_max); + DRV_LOG(INFO, "seg_max : 0x%08x", dev_cfg->seg_max); + DRV_LOG(INFO, "blk_size : 0x%08x", dev_cfg->blk_size); + DRV_LOG(INFO, "geometry"); + DRV_LOG(INFO, " cylinders: %u", dev_cfg->geometry.cylinders); + DRV_LOG(INFO, " heads: %u", dev_cfg->geometry.heads); + DRV_LOG(INFO, " sectors : %u", dev_cfg->geometry.sectors); + DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues); + + DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n", + config[0], config[1], config[2], config[3], config[4], + config[5], config[6], config[7]); + return 0; +} + +static int +ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev, + uint64_t *features) +{ + RTE_SET_USED(vdev); + + *features = VDPA_SUPPORTED_PROTOCOL_FEATURES; + *features |= VDPA_BLK_PROTOCOL_FEATURES; + return 0; +} + +static struct rte_vdpa_dev_ops ifcvf_blk_ops = { + .get_queue_num = ifcvf_get_queue_num, + .get_features = ifcvf_get_vdpa_features, + .set_features = ifcvf_set_features, + .get_protocol_features = ifcvf_blk_get_protocol_features, + .dev_conf = ifcvf_dev_config, + .dev_close = ifcvf_dev_close, + .set_vring_state = NULL, + .migration_done = NULL, + .get_vfio_group_fd = ifcvf_get_vfio_group_fd, + .get_vfio_device_fd = ifcvf_get_vfio_device_fd, + .get_notify_area = ifcvf_get_notify_area, + .get_config = ifcvf_blk_get_config, +}; + struct rte_vdpa_dev_info dev_info[] = { { .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | @@ -1215,7 +1298,7 @@ struct rte_vdpa_dev_info dev_info[] = { { .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | (1ULL << VHOST_F_LOG_ALL), - .ops = NULL, + .ops = &ifcvf_blk_ops, }, }; -- 1.8.3.1
[PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device
For the blk we need to relay all the cmd of each queue. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 46 --- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 57fdd2c..ef5b36c 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info { irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; irq_set->start = 0; fd_ptr = (int *)&irq_set->data; + /* The first interrupt is for the configure space change notification */ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(internal->pdev->intr_handle); for (i = 0; i < nr_vring; i++) internal->intr_fd[i] = -1; - for (i = 0; i < nr_vring; i++) { - rte_vhost_get_vhost_vring(internal->vid, i, &vring); - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; - if ((i & 1) == 0 && m_rx == true) { - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); - if (fd < 0) { - DRV_LOG(ERR, "can't setup eventfd: %s", - strerror(errno)); - return -1; + if (internal->device_type == IFCVF_NET) { + for (i = 0; i < nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vring); + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; + if ((i & 1) == 0 && m_rx == true) { + /* For the net we only need to relay rx queue, +* which will change the mem of VM. +*/ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + DRV_LOG(ERR, "can't setup eventfd: %s", + strerror(errno)); + return -1; + } + internal->intr_fd[i] = fd; + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; + } + } + } else if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vring); + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; + if (m_rx == true) { + /* For the blk we need to relay all the read cmd +* of each queue +*/ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + DRV_LOG(ERR, "can't setup eventfd: %s", + strerror(errno)); + return -1; + } + internal->intr_fd[i] = fd; + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; } - internal->intr_fd[i] = fd; - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; } } -- 1.8.3.1
[PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration
Enable virtio blk sw live migration relay callfd and log the dirty page. In this version we ignore the write cmd and still mark it dirty. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 4 +- drivers/vdpa/ifc/base/ifcvf.h | 6 ++ drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++--- 3 files changed, 116 insertions(+), 22 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..3a69e53 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -189,7 +189,7 @@ IFCVF_WRITE_REG32(val >> 32, hi); } -STATIC int +int ifcvf_hw_enable(struct ifcvf_hw *hw) { struct ifcvf_pci_common_cfg *cfg; @@ -238,7 +238,7 @@ return 0; } -STATIC void +void ifcvf_hw_disable(struct ifcvf_hw *hw) { u32 i; diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -179,4 +179,10 @@ struct ifcvf_hw { u64 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid); +int +ifcvf_hw_enable(struct ifcvf_hw *hw); + +void +ifcvf_hw_disable(struct ifcvf_hw *hw); + #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index ef5b36c..14bc5c8 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info { rte_vhost_get_negotiated_features(vid, &features); if (RTE_VHOST_NEED_LOG(features)) { - ifcvf_disable_logging(hw); - rte_vhost_get_log_base(internal->vid, &log_base, &log_size); - rte_vfio_container_dma_unmap(internal->vfio_container_fd, - log_base, IFCVF_LOG_BASE, log_size); + if (internal->device_type == IFCVF_NET) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, + &log_size); + rte_vfio_container_dma_unmap( + internal->vfio_container_fd, log_base, + IFCVF_LOG_BASE, log_size); + } + /* IFCVF marks dirty memory pages for only packet buffer, +* SW helps to mark the used ring as dirty after device stops. +*/ + for (i = 0; i < hw->nr_vring; i++) { + len = IFCVF_USED_RING_LEN(hw->vring[i].size); + rte_vhost_log_used_vring(vid, i, 0, len); + } + } +} + +static void +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) +{ + struct ifcvf_hw *hw = &internal->hw; + struct rte_vhost_vring vq; + int i, vid; + uint64_t features = 0; + uint64_t log_base = 0, log_size = 0; + uint64_t len; + + vid = internal->vid; + + if (internal->device_type == IFCVF_BLK) { + for (i = 0; i < hw->nr_vring; i++) { + rte_vhost_get_vhost_vring(internal->vid, i, &vq); + while (vq.avail->idx != vq.used->idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + hw->vring[i].last_avail_idx = vq.avail->idx; + hw->vring[i].last_used_idx = vq.used->idx; + } + } + + ifcvf_hw_disable(hw); + + for (i = 0; i < hw->nr_vring; i++) + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, + hw->vring[i].last_used_idx); + + if (internal->sw_lm) + return; + + rte_vhost_get_negotiated_features(vid, &features); + if (RTE_VHOST_NEED_LOG(features)) { + if (internal->device_type == IFCVF_NET) { + ifcvf_disable_logging(hw); + rte_vhost_get_log_base(internal->vid, &log_base, + &log_size); + rte_vfio_container_dma_unmap( + internal->vfio_container_fd, log_base, + IFCVF_LOG_BASE, log_size); + } /* * IFCVF marks dirty memory pages for only packet buffer, * SW helps to mark the used ring as dirty after device stops. @@ -661,15 +718,17 @@ struct rte_vdpa_dev_info { } hw->vring[i].avail = gpa; - /* Direct I/O for Tx queue, relay for Rx queue */ - if (i & 1) { + /* NETWORK: Direct I/O for Tx queue, relay for Rx queue +* BLK: relay every queue +*/ + if ((i & 1) && (int
[PATCH v2 06/15] example/vdpa:add vdpa blk support in example
Add virtio blk device suppoort to vdpa example. Signed-off-by: Andy Pei --- examples/vdpa/Makefile | 2 +- examples/vdpa/main.c | 8 ++ examples/vdpa/meson.build| 1 + examples/vdpa/vdpa_blk_compact.c | 150 +++ examples/vdpa/vdpa_blk_compact.h | 117 examples/vdpa/vhost_user.h | 189 +++ 6 files changed, 466 insertions(+), 1 deletion(-) create mode 100644 examples/vdpa/vdpa_blk_compact.c create mode 100644 examples/vdpa/vdpa_blk_compact.h create mode 100644 examples/vdpa/vhost_user.h diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile index d974db4..9d0479b 100644 --- a/examples/vdpa/Makefile +++ b/examples/vdpa/Makefile @@ -5,7 +5,7 @@ APP = vdpa # all source are stored in SRCS-y -SRCS-y := main.c +SRCS-y := main.c vdpa_blk_compact.c CFLAGS += -DALLOW_EXPERIMENTAL_API PKGCONF ?= pkg-config diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index 5ab0765..924ad7b 100644 --- a/examples/vdpa/main.c +++ b/examples/vdpa/main.c @@ -20,6 +20,7 @@ #include #include #include +#include "vdpa_blk_compact.h" #define MAX_PATH_LEN 128 #define MAX_VDPA_SAMPLE_PORTS 1024 @@ -156,6 +157,7 @@ struct vdpa_port { static const struct rte_vhost_device_ops vdpa_sample_devops = { .new_device = new_device, .destroy_device = destroy_device, + .new_connection = rte_vhost_blk_session_install_rte_compat_hooks, }; static int @@ -192,6 +194,12 @@ struct vdpa_port { "attach vdpa device failed: %s\n", socket_path); + if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev) + < 0) + rte_exit(EXIT_FAILURE, + "set vhost blk driver features and protocol features failed: %s\n", + socket_path); + if (rte_vhost_driver_start(socket_path) < 0) rte_exit(EXIT_FAILURE, "start vhost driver failed: %s\n", diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build index bd08605..f0d111c 100644 --- a/examples/vdpa/meson.build +++ b/examples/vdpa/meson.build @@ -15,4 +15,5 @@ deps += 'vhost' allow_experimental_apis = true sources = files( 'main.c', + 'vdpa_blk_compact.c', ) diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c new file mode 100644 index 000..0c4d3ee --- /dev/null +++ b/examples/vdpa/vdpa_blk_compact.c @@ -0,0 +1,150 @@ +/*INTEL CONFIDENTIAL + * + *Copyright (c) Intel Corporation. + *All rights reserved. + * + *The source code contained or described herein and all documents related + *to the source code ("Material") are owned by Intel Corporation or its + *suppliers or licensors. Title to the Material remains with Intel + *Corporation or its suppliers and licensors. The Material contains trade + *secrets and proprietary and confidential information of Intel or its + *suppliers and licensors. The Material is protected by worldwide + *copyright and trade secret laws and treaty provisions. No part of the + *Material may be used, copied, reproduced, modified, published, uploaded, + *posted, transmitted, distributed, or disclosed in any way without Intel's + *prior express written permission. + * + *No license under any patent, copyright, trade secret or other + *intellectual property right is granted to or conferred upon you by + *disclosure or delivery of the Materials, either expressly, by + *implication, inducement, estoppel or otherwise. Any license under such + *intellectual property rights must be express and approved by Intel in + *writing. + */ + +/* @file + * + * Block device specific vhost lib + */ + +#include + +#include +#include +#include +#include "vdpa_blk_compact.h" +#include "vhost_user.h" + +#define VHOST_USER_GET_CONFIG 24 +#define VHOST_USER_SET_CONFIG 25 + +#ifndef VHOST_USER_PROTOCOL_F_CONFIG +#define VHOST_USER_PROTOCOL_F_CONFIG 9 +#endif + +/* + * Function to handle vhost user blk message + */ +static enum rte_vhost_msg_result +rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg) +{ + struct VhostUserMsg *msg = _msg; + struct rte_vdpa_device *vdev = NULL; + + vdev = rte_vhost_get_vdpa_device(vid); + if (vdev == NULL) + return RTE_VHOST_MSG_RESULT_ERR; + + fprintf(stderr, "msg is %d\n", msg->request.master); + switch (msg->request.master) { + case VHOST_USER_GET_CONFIG: { + int rc = 0; + + fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n"); + + if (vdev->ops->get_config) { + fprintf(stdout, "get_config() fun
[PATCH v2 07/15] usertools: add support for virtio blk device
Add virtio blk device support to devbind. Signed-off-by: Andy Pei --- usertools/dpdk-devbind.py | 8 1 file changed, 8 insertions(+) diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py index ace4627..cbe336f 100755 --- a/usertools/dpdk-devbind.py +++ b/usertools/dpdk-devbind.py @@ -14,6 +14,8 @@ from os.path import join as path_join # The PCI base class for all devices +virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001', +'SVendor': '8086', 'SDevice': '0002'} network_class = {'Class': '02', 'Vendor': None, 'Device': None, 'SVendor': None, 'SDevice': None} acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None, @@ -72,6 +74,7 @@ cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4', 'SVendor': None, 'SDevice': None} +virtio_blk_devices = [virtio_blk_class] network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class] baseband_devices = [acceleration_class] crypto_devices = [encryption_class, intel_processor_class] @@ -587,6 +590,9 @@ def show_status(): Displays to the user what devices are bound to the igb_uio driver, the kernel driver or to no driver''' +if status_dev in ["virtio_blk", "all"]: +show_device_status(virtio_blk_devices, "virtio_blk") + if status_dev in ["net", "all"]: show_device_status(network_devices, "Network", if_field=True) @@ -746,6 +752,7 @@ def do_arg_actions(): if b_flag is not None: clear_data() # refresh if we have changed anything +get_device_details(virtio_blk_devices) get_device_details(network_devices) get_device_details(baseband_devices) get_device_details(crypto_devices) @@ -769,6 +776,7 @@ def main(): parse_args() check_modules() clear_data() +get_device_details(virtio_blk_devices) get_device_details(network_devices) get_device_details(baseband_devices) get_device_details(crypto_devices) -- 1.8.3.1
[PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device
Set_vring_state op is mandatory, add set_vring_state for blk device. Currently set_vring_state for blk device is not implemented. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 14bc5c8..00e7274 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1373,6 +1373,16 @@ struct rte_vdpa_dev_info { } static int +ifcvf_blk_set_vring_state(int vid, int vring, int state) +{ + RTE_SET_USED(vid); + RTE_SET_USED(vring); + RTE_SET_USED(state); + + return 0; +} + +static int ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features) { @@ -1390,7 +1400,7 @@ struct rte_vdpa_dev_info { .get_protocol_features = ifcvf_blk_get_protocol_features, .dev_conf = ifcvf_dev_config, .dev_close = ifcvf_dev_close, - .set_vring_state = NULL, + .set_vring_state = ifcvf_blk_set_vring_state, .migration_done = NULL, .get_vfio_group_fd = ifcvf_get_vfio_group_fd, .get_vfio_device_fd = ifcvf_get_vfio_device_fd, -- 1.8.3.1
[PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect
Add some log of virtio blk device config space information at VDPA launch before qemu connects. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 28 1 file changed, 28 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 00e7274..ff91e80 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1436,6 +1436,9 @@ struct rte_vdpa_dev_info dev_info[] = { struct rte_kvargs *kvlist = NULL; int ret = 0; int16_t device_id; + __u64 capacity = 0; + uint8_t *byte; + uint32_t i; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; @@ -1502,6 +1505,31 @@ struct rte_vdpa_dev_info dev_info[] = { internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_BLK].features; + + /* cannot read 64-bit register in one attempt, +* so read byte by byte. +*/ + for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) { + byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i; + capacity |= (__u64)*byte << (i * 8); + } + DRV_LOG(INFO, "capacity : %quG", capacity >> 21); + + DRV_LOG(INFO, "size_max : 0x%08x", + internal->hw.blk_cfg->size_max); + DRV_LOG(INFO, "seg_max : 0x%08x", + internal->hw.blk_cfg->seg_max); + DRV_LOG(INFO, "blk_size : 0x%08x", + internal->hw.blk_cfg->blk_size); + DRV_LOG(INFO, "geometry"); + DRV_LOG(INFO, "cylinders: %u", + internal->hw.blk_cfg->geometry.cylinders); + DRV_LOG(INFO, "heads: %u", + internal->hw.blk_cfg->geometry.heads); + DRV_LOG(INFO, "sectors : %u", + internal->hw.blk_cfg->geometry.sectors); + DRV_LOG(INFO, "num_queues: 0x%08x", + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware
Original code max_queues is set to IFCVF_MAX_QUEUES. New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index ff91e80..d30c3fd 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1530,6 +1530,10 @@ struct rte_vdpa_dev_info dev_info[] = { internal->hw.blk_cfg->geometry.sectors); DRV_LOG(INFO, "num_queues: 0x%08x", internal->hw.blk_cfg->num_queues); + + /* reset max_queue here, to minimum modification */ + internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES, + internal->hw.blk_cfg->num_queues); } list->internal = internal; -- 1.8.3.1
[PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk
Create a thread to poll and relay config space change interrupt. Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/ifcvf_vdpa.c | 111 ++ 1 file changed, 111 insertions(+) diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index d30c3fd..981cb26 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -53,7 +53,9 @@ struct ifcvf_internal { int vfio_group_fd; int vfio_dev_fd; pthread_t tid; /* thread for notify relay */ + pthread_t intr_tid; /* thread for intr relay */ int epfd; + int csc_fd; int vid; struct rte_vdpa_device *vdev; uint16_t max_queues; @@ -617,6 +619,106 @@ struct rte_vdpa_dev_info { return 0; } +static void +virtio_interrupt_handler(struct ifcvf_internal *internal) +{ + int vid = internal->vid; + int ret; + + ret = rte_vhost_slave_config_change(vid, 1); + if (ret) + DRV_LOG(ERR, "failed to notify the guest about configuration space change."); +} + +static void * +intr_relay(void *arg) +{ + struct ifcvf_internal *internal = (struct ifcvf_internal *)arg; + struct epoll_event csc_event; + struct epoll_event ev; + uint64_t buf; + int nbytes; + int csc_fd, csc_val = 0; + + csc_fd = epoll_create(1); + if (csc_fd < 0) { + DRV_LOG(ERR, "failed to create epoll for config space change."); + return NULL; + } + + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; + ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle); + if (epoll_ctl(csc_fd, EPOLL_CTL_ADD, + rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) { + DRV_LOG(ERR, "epoll add error: %s", strerror(errno)); + return NULL; + } + + internal->csc_fd = csc_fd; + + for (;;) { + csc_val = epoll_wait(csc_fd, &csc_event, 1, -1); + if (csc_val < 0) { + if (errno == EINTR) + continue; + DRV_LOG(ERR, "epoll_wait return fail\n"); + return NULL; + } else if (csc_val == 0) { + continue; + } else { + /* csc_val > 0 */ + nbytes = read(csc_event.data.fd, &buf, 8); + if (nbytes < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + DRV_LOG(ERR, "Error reading from file descriptor %d: %s\n", + csc_event.data.fd, + strerror(errno)); + return NULL; + } else if (nbytes == 0) { + DRV_LOG(ERR, "Read nothing from file descriptor %d\n", + csc_event.data.fd); + continue; + } else { + virtio_interrupt_handler(internal); + } + } + } + return NULL; +} + +static int +setup_intr_relay(struct ifcvf_internal *internal) +{ + int ret; + + ret = pthread_create(&internal->intr_tid, NULL, intr_relay, + (void *)internal); + if (ret) { + DRV_LOG(ERR, "failed to create notify relay pthread."); + return -1; + } + return 0; +} + +static int +unset_intr_relay(struct ifcvf_internal *internal) +{ + void *status; + + if (internal->intr_tid) { + pthread_cancel(internal->intr_tid); + pthread_join(internal->intr_tid, &status); + } + internal->intr_tid = 0; + + if (internal->csc_fd >= 0) + close(internal->csc_fd); + internal->csc_fd = -1; + + return 0; +} + static int update_datapath(struct ifcvf_internal *internal) { @@ -643,10 +745,16 @@ struct rte_vdpa_dev_info { if (ret) goto err; + ret = setup_intr_relay(internal); + if (ret) + goto err; + rte_atomic32_set(&internal->running, 1); } else if (rte_atomic32_read(&internal->running) && (!rte_atomic32_read(&internal->started) || !rte_atomic32_read(&internal->dev_attached))) { + ret = unset_intr_relay(internal); + ret = unset_notify_relay(internal); if (ret) goto err; @@
[PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
Add is_blk flag to ifcvf_hw, and init is_blk during probe. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.h | 1 + drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 6dd7925..8e602af 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -149,6 +149,7 @@ struct ifcvf_hw { u8 *lm_cfg; struct vring_info vring[IFCVF_MAX_QUEUES * 2]; u8 nr_vring; + u8 is_blk; struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE]; }; diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 981cb26..4eb8f98 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -1608,11 +1608,13 @@ struct rte_vdpa_dev_info dev_info[] = { if (device_id == VIRTIO_ID_NET) { internal->device_type = IFCVF_NET; + internal->hw.is_blk = IFCVF_NET; internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_NET].features; } else if (device_id == VIRTIO_ID_BLOCK) { internal->device_type = IFCVF_BLK; + internal->hw.is_blk = IFCVF_BLK; internal->features = features & ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); internal->features |= dev_info[IFCVF_BLK].features; -- 1.8.3.1
[PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device
1.last_avail_idx is lower 16 bit of the register. 2.address of ring_state register is different between net and blk device. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 36 +--- drivers/vdpa/ifc/base/ifcvf.h | 1 + 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index 3a69e53..a8a4728 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -216,10 +216,18 @@ &cfg->queue_used_hi); IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size); - *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) = - (u32)hw->vring[i].last_avail_idx | - ((u32)hw->vring[i].last_used_idx << 16); + if (hw->is_blk == IFCVF_BLK) { + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } else if (hw->is_blk == IFCVF_NET) { + *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4) = + (u32)hw->vring[i].last_avail_idx | + ((u32)hw->vring[i].last_used_idx << 16); + } IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector); if (IFCVF_READ_REG16(&cfg->queue_msix_vector) == @@ -252,9 +260,23 @@ IFCVF_WRITE_REG16(i, &cfg->queue_select); IFCVF_WRITE_REG16(0, &cfg->queue_enable); IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector); - ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET + - (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4); - hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); + + if (hw->is_blk) { + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + } else if (hw->is_blk == IFCVF_NET) { + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + (i / 2) * IFCVF_LM_CFG_SIZE + + (i % 2) * 4); + } + + if (hw->is_blk == IFCVF_BLK) + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + else if (hw->is_blk == IFCVF_NET) + hw->vring[i].last_avail_idx = (u16)(ring_state >> 16); hw->vring[i].last_used_idx = (u16)(ring_state >> 16); } } diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 8e602af..7367094 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -65,6 +65,7 @@ #define IFCVF_MEDIATED_VRING 0x2000 #define IFCVF_32_BIT_MASK 0x +#define IFCVF_16_BIT_MASK 0x #ifndef VHOST_USER_PROTOCOL_F_CONFIG #define VHOST_USER_PROTOCOL_F_CONFIG 9 -- 1.8.3.1
[PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause
When virtio blk device is pause, make sure hardware last_avail_idx and last_used_idx is the same. Signed-off-by: Andy Pei --- drivers/vdpa/ifc/base/ifcvf.c | 2 +- drivers/vdpa/ifc/base/ifcvf.h | 3 +++ drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c index a8a4728..7018048 100644 --- a/drivers/vdpa/ifc/base/ifcvf.c +++ b/drivers/vdpa/ifc/base/ifcvf.c @@ -116,7 +116,7 @@ IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status); } -STATIC void +void ifcvf_reset(struct ifcvf_hw *hw) { ifcvf_set_status(hw, 0); diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h index 7367094..f22d18b 100644 --- a/drivers/vdpa/ifc/base/ifcvf.h +++ b/drivers/vdpa/ifc/base/ifcvf.h @@ -157,6 +157,9 @@ struct ifcvf_hw { int ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev); +void +ifcvf_reset(struct ifcvf_hw *hw); + u64 ifcvf_get_features(struct ifcvf_hw *hw); diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4eb8f98..b0b2859 100644 --- a/drivers/vdpa/ifc/ifcvf_vdpa.c +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c @@ -356,23 +356,32 @@ struct rte_vdpa_dev_info { vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) { struct ifcvf_hw *hw = &internal->hw; - struct rte_vhost_vring vq; int i, vid; uint64_t features = 0; uint64_t log_base = 0, log_size = 0; uint64_t len; + u32 ring_state = 0; vid = internal->vid; if (internal->device_type == IFCVF_BLK) { for (i = 0; i < hw->nr_vring; i++) { - rte_vhost_get_vhost_vring(internal->vid, i, &vq); - while (vq.avail->idx != vq.used->idx) { - ifcvf_notify_queue(hw, i); - usleep(10); - } - hw->vring[i].last_avail_idx = vq.avail->idx; - hw->vring[i].last_used_idx = vq.used->idx; + do { + if (hw->lm_cfg != NULL) + ring_state = *(u32 *)(hw->lm_cfg + + IFCVF_LM_RING_STATE_OFFSET + + i * IFCVF_LM_CFG_SIZE); + hw->vring[i].last_avail_idx = + (u16)(ring_state & IFCVF_16_BIT_MASK); + hw->vring[i].last_used_idx = + (u16)(ring_state >> 16); + if (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx) { + ifcvf_notify_queue(hw, i); + usleep(10); + } + } while (hw->vring[i].last_avail_idx != + hw->vring[i].last_used_idx); } } @@ -759,7 +768,12 @@ struct rte_vdpa_dev_info { if (ret) goto err; - vdpa_ifcvf_stop(internal); + if (internal->device_type == IFCVF_BLK) { + vdpa_ifcvf_blk_pause(internal); + ifcvf_reset(&internal->hw); + } else { + vdpa_ifcvf_stop(internal); + } ret = vdpa_disable_vfio_intr(internal); if (ret) -- 1.8.3.1
[PATCH v2 15/15] vhost: make sure each queue callfd is configured
During the vhost data path building process, qemu will create a call fd at first, and create another call fd in the end. The final call fd will be used to relay notify. In the original code, after kick fd is set, dev_conf will set the first call fd. Even though the actual call fd will set, the data path will not work correctly. Signed-off-by: Andy Pei --- lib/vhost/vhost_user.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 5eb1dd6..b25b25f 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -3137,12 +3137,26 @@ typedef int (*vhost_message_handler_t)(struct virtio_net **pdev, if (!vdpa_dev) goto out; + if (request != VHOST_USER_SET_VRING_CALL) + goto out; + if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) { if (vdpa_dev->ops->dev_conf(dev->vid)) VHOST_LOG_CONFIG(ERR, "Failed to configure vDPA device\n"); else dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED; + } else { + /* when VIRTIO_DEV_VDPA_CONFIGURED already configured +* close the device and config the device again, +* make sure the call fd of each queue is configured correctly. +*/ + if (vdpa_dev->ops->dev_close(dev->vid)) + VHOST_LOG_CONFIG(ERR, +"Failed to close vDPA device\n"); + if (vdpa_dev->ops->dev_conf(dev->vid)) + VHOST_LOG_CONFIG(ERR, +"Failed to re-config vDPA device\n"); } out: -- 1.8.3.1
[PATCH v3 00/15] add virtio_blk device support to vdpa/ifc
This patch set add virtio_blk device support to vdpa/ifc driver. With a lot of similarities, I re-use part of vdpa/ifc driver. Distinguish the virtio net and blk device by device id, and implement specific features and ops. Add example to vdpa to support virtio_blk device. To support blk device live migration, some modification to vhost lib. Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg. v3: Fix some compile issues. v2: Fix some coding style issues. Andy Pei (15): vdpa/ifc: add support for virtio blk device vhost: add vdpa ops for blk device vdpa/ifc: add blk ops for ifc device vdpa/ifc: add vdpa interrupt for blk device vdpa/ifc: add blk dev sw live migration example/vdpa:add vdpa blk support in example usertools: add support for virtio blk device vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device vdpa/ifc: add some log at VDPA launch before qemu connect vdpa/ifc: read virtio max_queues from hardware vdpa: add config space change interrupt register and handle for virtio_blk vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe vdpa/ifc/base: for blk device, live migration register is different from net device vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the same when blk device pause vhost: make sure each queue callfd is configured drivers/vdpa/ifc/base/ifcvf.c| 42 +++- drivers/vdpa/ifc/base/ifcvf.h| 29 ++- drivers/vdpa/ifc/ifcvf_vdpa.c| 520 --- examples/vdpa/Makefile | 2 +- examples/vdpa/main.c | 8 + examples/vdpa/meson.build| 1 + examples/vdpa/vdpa_blk_compact.c | 150 +++ examples/vdpa/vdpa_blk_compact.h | 117 + examples/vdpa/vhost_user.h | 189 ++ lib/vhost/vdpa_driver.h | 8 +- lib/vhost/vhost_user.c | 14 ++ usertools/dpdk-devbind.py| 8 + 12 files changed, 1034 insertions(+), 54 deletions(-) create mode 100644 examples/vdpa/vdpa_blk_compact.c create mode 100644 examples/vdpa/vdpa_blk_compact.h create mode 100644 examples/vdpa/vhost_user.h -- 1.8.3.1