[PATCH v5 0/8] vdpa/ifc: add multi queue support

2022-10-17 Thread Andy Pei
v5:
 fix some commit message.
 rework some code logic.

v4:
 fix some commit message.
 add some commets to code.
 fix some code to reduce confusion.

v3:
 rename device ID macro name.
 fix some patch title and commit message.
 delete some used marco.
 rework some code logic.

v2:
 fix some coding style issue.
 support dynamic enable/disable queue at run time.

Andy Pei (6):
  vdpa/ifc: add multi-queue support
  vdpa/ifc: set max queues based on virtio spec
  vdpa/ifc: write queue count to MQ register
  vdpa/ifc: only configure enabled queue
  vhost: vDPA blk device gets ready when the first queue is ready
  vhost: improve vDPA blk device configure condition

Huang Wei (2):
  vdpa/ifc: add new device ID for legacy network device
  vdpa/ifc: support dynamic enable/disable queue

 drivers/vdpa/ifc/base/ifcvf.c | 144 ++
 drivers/vdpa/ifc/base/ifcvf.h |  16 -
 drivers/vdpa/ifc/ifcvf_vdpa.c | 142 +++--
 lib/vhost/vhost_user.c|  44 +++--
 4 files changed, 315 insertions(+), 31 deletions(-)

-- 
1.8.3.1



[PATCH v5 1/8] vdpa/ifc: add new device ID for legacy network device

2022-10-17 Thread Andy Pei
From: Huang Wei 

Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000).
Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID".

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/base/ifcvf.h |  6 --
 drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 9d95aac..ef7697a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -12,12 +12,14 @@
 #define IFCVF_BLK  1
 
 #define IFCVF_VENDOR_ID 0x1AF4
-#define IFCVF_NET_DEVICE_ID 0x1041
+#define IFCVF_NET_MODERN_DEVICE_ID  0x1041
 #define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000
 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
 #define IFCVF_SUBSYS_VENDOR_ID  0x8086
 #define IFCVF_SUBSYS_DEVICE_ID  0x001A
-#define IFCVF_BLK_DEVICE_ID 0x0002
+#define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
+#define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
 #define IFCVF_MAX_QUEUES   1
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d5ac583..b4389a0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = {
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
- .device_id = IFCVF_NET_DEVICE_ID,
+ .device_id = IFCVF_NET_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
+ .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID,
+ .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID,
+   },
+
+   { .class_id = RTE_CLASS_ANY_ID,
+ .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .vendor_id = 0, /* sentinel */
-- 
1.8.3.1



[PATCH v5 2/8] vdpa/ifc: add multi-queue support

2022-10-17 Thread Andy Pei
Enable VHOST_USER_PROTOCOL_F_MQ feature.
Expose IFCVF_MQ_OFFSET register to enable multi-queue.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 drivers/vdpa/ifc/base/ifcvf.h | 2 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index f1e1474..81c68c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -90,6 +90,15 @@
if (!hw->lm_cfg)
WARNINGOUT("HW support live migration not support!\n");
 
+   /* For some hardware implementation, for example:
+* the BAR 4 of PF is NULL, while BAR 4 of VF is not.
+* This code makes sure hw->mq_cfg is a valid address.
+*/
+   if (hw->mem_resource[4].addr)
+   hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET;
+   else
+   hw->mq_cfg = NULL;
+
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
DEBUGOUT("capability incomplete\n");
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index ef7697a..d16d9ab 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -50,6 +50,7 @@
 
 #define IFCVF_LM_CFG_SIZE  0x40
 #define IFCVF_LM_RING_STATE_OFFSET 0x20
+#define IFCVF_MQ_OFFSET0x28
 
 #define IFCVF_LM_LOGGING_CTRL  0x0
 
@@ -149,6 +150,7 @@ struct ifcvf_hw {
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
u8 *lm_cfg;
+   u8 *mq_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
int device_type;
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index b4389a0..008cf89 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
+1ULL << VHOST_USER_PROTOCOL_F_MQ | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
 
 #define VDPA_BLK_PROTOCOL_FEATURES \
-- 
1.8.3.1



[PATCH v5 3/8] vdpa/ifc: set max queues based on virtio spec

2022-10-17 Thread Andy Pei
Set max_queues according to virtio spec.
For virtio BLK device, set max_queues to the value of num_queues
in struct virtio_blk_config.
For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg,
get the queue pair number using num_queues and set max_queues to it.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.h |  2 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index d16d9ab..1e133c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -21,7 +21,7 @@
 #define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
 #define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
-#define IFCVF_MAX_QUEUES   1
+#define IFCVF_MAX_QUEUES   32
 
 #ifndef VIRTIO_F_IOMMU_PLATFORM
 #define VIRTIO_F_IOMMU_PLATFORM33
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 008cf89..5a24204 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -26,6 +26,12 @@
 
 #include "base/ifcvf.h"
 
+/*
+ * RTE_MIN() cannot be used since braced-group within expression allowed
+ * only inside a function.
+ */
+#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2))
+
 RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 #define DRV_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
@@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = {
uint64_t capacity = 0;
uint8_t *byte;
uint32_t i;
+   uint16_t queue_pairs;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = {
}
 
internal->configured = 0;
-   internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
 
device_id = ifcvf_pci_get_device_type(pci_dev);
@@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->hw.device_type = IFCVF_NET;
+   /*
+* ifc device always has CTRL_VQ,
+* and supports VIRTIO_NET_F_CTRL_VQ feature.
+*/
+   queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2;
+   DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name,
+   queue_pairs);
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs);
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
@@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(DEBUG, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v5 4/8] vdpa/ifc: write queue count to MQ register

2022-10-17 Thread Andy Pei
Write queue count to IFCVF_MQ_OFFSET register
to enable multi-queue feature.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 81c68c0..b377126 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -202,6 +202,37 @@
IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
+STATIC void
+ifcvf_enable_mq(struct ifcvf_hw *hw)
+{
+   u8 *mq_cfg;
+   u8 qid;
+   int nr_queue = 0;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
+   nr_queue++;
+   }
+
+   if (nr_queue == 0) {
+   WARNINGOUT("no enabled vring\n");
+   return;
+   }
+
+   mq_cfg = hw->mq_cfg;
+   if (mq_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   *(u32 *)mq_cfg = nr_queue;
+   RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue);
+   } else {
+   *(u32 *)mq_cfg = nr_queue / 2;
+   RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n",
+   nr_queue / 2);
+   }
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
@@ -219,6 +250,7 @@
return -1;
}
 
+   ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
-- 
1.8.3.1



[PATCH v5 6/8] vdpa/ifc: support dynamic enable/disable queue

2022-10-17 Thread Andy Pei
From: Huang Wei 

Support dynamic enable or disable queue.
For front end, like QEMU, user can use ethtool to configure queue.
For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs.

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 100 ++
 drivers/vdpa/ifc/base/ifcvf.h |   6 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c |  93 ---
 3 files changed, 184 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 30bb8cb..869ddd6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -233,6 +233,106 @@
}
 }
 
+int
+ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u8 *lm_cfg;
+   u16 notify_off;
+   int msix_vector;
+
+   if (i >= (int)hw->nr_vring)
+   return -1;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return -1;
+   }
+
+   ifcvf_enable_mq(hw);
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector != (i + 1)) {
+   IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector == IFCVF_MSI_NO_VECTOR) {
+   RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n",
+   i);
+   return -1;
+   }
+   }
+
+   io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
+   &cfg->queue_desc_hi);
+   io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
+   &cfg->queue_avail_hi);
+   io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
+   &cfg->queue_used_hi);
+   IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK)
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   else
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
+
+   notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off);
+   hw->notify_addr[i] = (void *)((u8 *)hw->notify_base +
+   notify_off * hw->notify_off_multiplier);
+   IFCVF_WRITE_REG16(1, &cfg->queue_enable);
+
+   return 0;
+}
+
+void
+ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u32 ring_state;
+   u8 *lm_cfg;
+
+   if (i >= (int)hw->nr_vring)
+   return;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return;
+   }
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   IFCVF_WRITE_REG16(0, &cfg->queue_enable);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   } else {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+   }
+   hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 1e133c0..3726da7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -164,6 +164,12 @@ struct ifcvf_hw {
 ifcvf_get_features(struct ifcvf_hw *hw);
 
 int
+ifcvf_en

[PATCH v5 5/8] vdpa/ifc: only configure enabled queue

2022-10-17 Thread Andy Pei
When configuring the hardware queue, we only configure queues which
have been enabled by vhost.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index b377126..30bb8cb 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -252,6 +252,9 @@
 
ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a24204..0c3407a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -284,6 +284,8 @@ struct rte_vdpa_dev_info {
rte_vhost_get_negotiated_features(vid, &hw->req_features);
 
for (i = 0; i < nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
rte_vhost_get_vhost_vring(vid, i, &vq);
gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
if (gpa == 0) {
@@ -499,6 +501,8 @@ struct rte_vdpa_dev_info {
 
vring.kickfd = -1;
for (qid = 0; qid < q_num; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
ev.events = EPOLLIN | EPOLLPRI;
rte_vhost_get_vhost_vring(internal->vid, qid, &vring);
ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
@@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info {
struct rte_vdpa_device *vdev;
struct internal_list *list;
struct ifcvf_internal *internal;
+   struct ifcvf_hw *hw;
+   uint16_t i;
 
vdev = rte_vhost_get_vdpa_device(vid);
list = find_internal_resource_by_vdev(vdev);
@@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info {
rte_atomic32_set(&internal->dev_attached, 1);
update_datapath(internal);
 
-   if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
-   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
+   hw = &internal->hw;
+   for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+   if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0)
+   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
vdev->device->name);
+   }
 
internal->configured = 1;
+   DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name);
return 0;
 }
 
-- 
1.8.3.1



[PATCH v5 7/8] vhost: vDPA blk device gets ready when the first queue is ready

2022-10-17 Thread Andy Pei
When boot from virtio blk device, seabios in QEMU only enables one queue.
To work in this scenario, vDPA BLK device back-end configure device
when any queue is ready.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
---
 lib/vhost/vhost_user.c | 31 +++
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index cd65257..f5206dd 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1441,11 +1441,15 @@
 }
 
 #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u
+#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u
 
 static int
 virtio_is_ready(struct virtio_net *dev)
 {
+   struct rte_vdpa_device *vdpa_dev;
struct vhost_virtqueue *vq;
+   uint32_t vdpa_type;
+   int ret = 0;
uint32_t i, nr_vring = dev->nr_vring;
 
if (dev->flags & VIRTIO_DEV_READY)
@@ -1454,13 +1458,32 @@
if (!dev->nr_vring)
return 0;
 
-   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) {
-   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
+   vdpa_dev = dev->vdpa_dev;
+   if (vdpa_dev) {
+   if (vdpa_dev->ops->get_dev_type) {
+   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
+   if (ret) {
+   VHOST_LOG_CONFIG(dev->ifname, ERR,
+   "failed to get vdpa dev type.\n");
+   return -1;
+   }
+   } else {
+   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+   }
+   } else {
+   vdpa_type = -1;
+   }
 
-   if (dev->nr_vring < nr_vring)
-   return 0;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY;
+   } else {
+   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET)
+   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
}
 
+   if (dev->nr_vring < nr_vring)
+   return 0;
+
for (i = 0; i < nr_vring; i++) {
vq = dev->virtqueue[i];
 
-- 
1.8.3.1



[PATCH v5 8/8] vhost: improve vDPA blk device configure condition

2022-10-17 Thread Andy Pei
To support multi-queue, configure device
after call fd of all queues are set.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 lib/vhost/vhost_user.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index f5206dd..6b5f89a 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2984,6 +2984,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
uint32_t vdpa_type = 0;
uint32_t request;
uint32_t i;
+   uint16_t blk_call_fd;
 
dev = get_device(vid);
if (dev == NULL)
@@ -3203,9 +3204,15 @@ static int is_vring_iotlb(struct virtio_net *dev,
} else {
vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
}
-   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
-   && request != VHOST_USER_SET_VRING_CALL)
-   goto out;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   if (request == VHOST_USER_SET_VRING_CALL) {
+   blk_call_fd = ctx.msg.payload.u64 & 
VHOST_USER_VRING_IDX_MASK;
+   if (blk_call_fd != dev->nr_vring - 1)
+   goto out;
+   } else {
+   goto out;
+   }
+   }
 
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
-- 
1.8.3.1



[PATCH v6 0/8] vdpa/ifc: add multi queue support

2022-10-17 Thread Andy Pei
v6:
 Add vdpa_device_type to rte_vdpa_device to store vDPA device type.

v5:
 fix some commit message.
 rework some code logic.

v4:
 fix some commit message.
 add some commets to code.
 fix some code to reduce confusion.

v3:
 rename device ID macro name.
 fix some patch title and commit message.
 delete some used marco.
 rework some code logic.

v2:
 fix some coding style issue.
 support dynamic enable/disable queue at run time.

Andy Pei (6):
  vdpa/ifc: add multi-queue support
  vdpa/ifc: set max queues based on virtio spec
  vdpa/ifc: write queue count to MQ register
  vdpa/ifc: only configure enabled queue
  vhost: vDPA blk device gets ready when the first queue is ready
  vhost: improve vDPA blk device configure condition

Huang Wei (2):
  vdpa/ifc: add new device ID for legacy network device
  vdpa/ifc: support dynamic enable/disable queue

 drivers/vdpa/ifc/base/ifcvf.c | 144 ++
 drivers/vdpa/ifc/base/ifcvf.h |  16 -
 drivers/vdpa/ifc/ifcvf_vdpa.c | 142 +++--
 lib/vhost/vdpa_driver.h   |   2 +
 lib/vhost/vhost_user.c|  62 +-
 5 files changed, 325 insertions(+), 41 deletions(-)

-- 
1.8.3.1



[PATCH v6 1/8] vdpa/ifc: add new device ID for legacy network device

2022-10-17 Thread Andy Pei
From: Huang Wei 

Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000).
Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID".

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/base/ifcvf.h |  6 --
 drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 9d95aac..ef7697a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -12,12 +12,14 @@
 #define IFCVF_BLK  1
 
 #define IFCVF_VENDOR_ID 0x1AF4
-#define IFCVF_NET_DEVICE_ID 0x1041
+#define IFCVF_NET_MODERN_DEVICE_ID  0x1041
 #define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000
 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
 #define IFCVF_SUBSYS_VENDOR_ID  0x8086
 #define IFCVF_SUBSYS_DEVICE_ID  0x001A
-#define IFCVF_BLK_DEVICE_ID 0x0002
+#define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
+#define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
 #define IFCVF_MAX_QUEUES   1
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d5ac583..b4389a0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = {
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
- .device_id = IFCVF_NET_DEVICE_ID,
+ .device_id = IFCVF_NET_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
+ .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID,
+ .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID,
+   },
+
+   { .class_id = RTE_CLASS_ANY_ID,
+ .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .vendor_id = 0, /* sentinel */
-- 
1.8.3.1



[PATCH v6 2/8] vdpa/ifc: add multi-queue support

2022-10-17 Thread Andy Pei
Enable VHOST_USER_PROTOCOL_F_MQ feature.
Expose IFCVF_MQ_OFFSET register to enable multi-queue.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 drivers/vdpa/ifc/base/ifcvf.h | 2 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index f1e1474..81c68c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -90,6 +90,15 @@
if (!hw->lm_cfg)
WARNINGOUT("HW support live migration not support!\n");
 
+   /* For some hardware implementation, for example:
+* the BAR 4 of PF is NULL, while BAR 4 of VF is not.
+* This code makes sure hw->mq_cfg is a valid address.
+*/
+   if (hw->mem_resource[4].addr)
+   hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET;
+   else
+   hw->mq_cfg = NULL;
+
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
DEBUGOUT("capability incomplete\n");
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index ef7697a..d16d9ab 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -50,6 +50,7 @@
 
 #define IFCVF_LM_CFG_SIZE  0x40
 #define IFCVF_LM_RING_STATE_OFFSET 0x20
+#define IFCVF_MQ_OFFSET0x28
 
 #define IFCVF_LM_LOGGING_CTRL  0x0
 
@@ -149,6 +150,7 @@ struct ifcvf_hw {
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
u8 *lm_cfg;
+   u8 *mq_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
int device_type;
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index b4389a0..008cf89 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
+1ULL << VHOST_USER_PROTOCOL_F_MQ | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
 
 #define VDPA_BLK_PROTOCOL_FEATURES \
-- 
1.8.3.1



[PATCH v6 3/8] vdpa/ifc: set max queues based on virtio spec

2022-10-17 Thread Andy Pei
Set max_queues according to virtio spec.
For virtio BLK device, set max_queues to the value of num_queues
in struct virtio_blk_config.
For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg,
get the queue pair number using num_queues and set max_queues to it.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.h |  2 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index d16d9ab..1e133c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -21,7 +21,7 @@
 #define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
 #define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
-#define IFCVF_MAX_QUEUES   1
+#define IFCVF_MAX_QUEUES   32
 
 #ifndef VIRTIO_F_IOMMU_PLATFORM
 #define VIRTIO_F_IOMMU_PLATFORM33
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 008cf89..5a24204 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -26,6 +26,12 @@
 
 #include "base/ifcvf.h"
 
+/*
+ * RTE_MIN() cannot be used since braced-group within expression allowed
+ * only inside a function.
+ */
+#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2))
+
 RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 #define DRV_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
@@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = {
uint64_t capacity = 0;
uint8_t *byte;
uint32_t i;
+   uint16_t queue_pairs;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = {
}
 
internal->configured = 0;
-   internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
 
device_id = ifcvf_pci_get_device_type(pci_dev);
@@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->hw.device_type = IFCVF_NET;
+   /*
+* ifc device always has CTRL_VQ,
+* and supports VIRTIO_NET_F_CTRL_VQ feature.
+*/
+   queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2;
+   DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name,
+   queue_pairs);
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs);
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
@@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(DEBUG, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v6 4/8] vdpa/ifc: write queue count to MQ register

2022-10-17 Thread Andy Pei
Write queue count to IFCVF_MQ_OFFSET register
to enable multi-queue feature.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 81c68c0..b377126 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -202,6 +202,37 @@
IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
+STATIC void
+ifcvf_enable_mq(struct ifcvf_hw *hw)
+{
+   u8 *mq_cfg;
+   u8 qid;
+   int nr_queue = 0;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
+   nr_queue++;
+   }
+
+   if (nr_queue == 0) {
+   WARNINGOUT("no enabled vring\n");
+   return;
+   }
+
+   mq_cfg = hw->mq_cfg;
+   if (mq_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   *(u32 *)mq_cfg = nr_queue;
+   RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue);
+   } else {
+   *(u32 *)mq_cfg = nr_queue / 2;
+   RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n",
+   nr_queue / 2);
+   }
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
@@ -219,6 +250,7 @@
return -1;
}
 
+   ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
-- 
1.8.3.1



[PATCH v6 5/8] vdpa/ifc: only configure enabled queue

2022-10-17 Thread Andy Pei
When configuring the hardware queue, we only configure queues which
have been enabled by vhost.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index b377126..30bb8cb 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -252,6 +252,9 @@
 
ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a24204..0c3407a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -284,6 +284,8 @@ struct rte_vdpa_dev_info {
rte_vhost_get_negotiated_features(vid, &hw->req_features);
 
for (i = 0; i < nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
rte_vhost_get_vhost_vring(vid, i, &vq);
gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
if (gpa == 0) {
@@ -499,6 +501,8 @@ struct rte_vdpa_dev_info {
 
vring.kickfd = -1;
for (qid = 0; qid < q_num; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
ev.events = EPOLLIN | EPOLLPRI;
rte_vhost_get_vhost_vring(internal->vid, qid, &vring);
ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
@@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info {
struct rte_vdpa_device *vdev;
struct internal_list *list;
struct ifcvf_internal *internal;
+   struct ifcvf_hw *hw;
+   uint16_t i;
 
vdev = rte_vhost_get_vdpa_device(vid);
list = find_internal_resource_by_vdev(vdev);
@@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info {
rte_atomic32_set(&internal->dev_attached, 1);
update_datapath(internal);
 
-   if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
-   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
+   hw = &internal->hw;
+   for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+   if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0)
+   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
vdev->device->name);
+   }
 
internal->configured = 1;
+   DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name);
return 0;
 }
 
-- 
1.8.3.1



[PATCH v6 7/8] vhost: vDPA blk device gets ready when the first queue is ready

2022-10-17 Thread Andy Pei
When boot from virtio blk device, seabios in QEMU only enables one queue.
To work in this scenario, vDPA BLK device back-end configure device
when any queue is ready.
Add vdpa_device_type to rte_vdpa_device to store vDPA device type.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
---
 lib/vhost/vdpa_driver.h |  2 ++
 lib/vhost/vhost_user.c  | 55 ++---
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8b88a53..c4ec222 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -92,6 +92,8 @@ struct rte_vdpa_device {
struct rte_device *device;
/** vdpa device operations */
struct rte_vdpa_dev_ops *ops;
+   /** vdpa device type: net, blk... */
+   uint32_t vdpa_device_type;
 };
 
 /**
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index cd65257..53806fa 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1441,11 +1441,14 @@
 }
 
 #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u
+#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u
 
 static int
 virtio_is_ready(struct virtio_net *dev)
 {
+   struct rte_vdpa_device *vdpa_dev;
struct vhost_virtqueue *vq;
+   uint32_t vdpa_type;
uint32_t i, nr_vring = dev->nr_vring;
 
if (dev->flags & VIRTIO_DEV_READY)
@@ -1454,13 +1457,22 @@
if (!dev->nr_vring)
return 0;
 
-   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) {
-   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
+   vdpa_dev = dev->vdpa_dev;
+   if (vdpa_dev)
+   vdpa_type = vdpa_dev->vdpa_device_type;
+   else
+   vdpa_type = -1;
 
-   if (dev->nr_vring < nr_vring)
-   return 0;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY;
+   } else {
+   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET)
+   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
}
 
+   if (dev->nr_vring < nr_vring)
+   return 0;
+
for (i = 0; i < nr_vring; i++) {
vq = dev->virtqueue[i];
 
@@ -2958,7 +2970,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
int ret;
int unlock_required = 0;
bool handled;
-   uint32_t vdpa_type = 0;
+   uint32_t vdpa_type = -1;
uint32_t request;
uint32_t i;
 
@@ -3152,7 +3164,27 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (unlock_required)
vhost_user_unlock_all_queue_pairs(dev);
 
-   if (ret != 0 || !virtio_is_ready(dev))
+   if (ret != 0)
+   goto out;
+
+   vdpa_dev = dev->vdpa_dev;
+   if (vdpa_dev) {
+   if (vdpa_dev->ops->get_dev_type) {
+   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
+   if (ret) {
+   VHOST_LOG_CONFIG(dev->ifname, ERR,
+   "failed to get vdpa dev type.\n");
+   ret = -1;
+   goto out;
+   }
+   } else {
+   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+   }
+
+   vdpa_dev->vdpa_device_type = vdpa_type;
+   }
+
+   if (!virtio_is_ready(dev))
goto out;
 
/*
@@ -3166,20 +3198,9 @@ static int is_vring_iotlb(struct virtio_net *dev,
dev->flags |= VIRTIO_DEV_RUNNING;
}
 
-   vdpa_dev = dev->vdpa_dev;
if (!vdpa_dev)
goto out;
 
-   if (vdpa_dev->ops->get_dev_type) {
-   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
-   if (ret) {
-   VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa 
dev type.\n");
-   ret = -1;
-   goto out;
-   }
-   } else {
-   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
-   }
if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
&& request != VHOST_USER_SET_VRING_CALL)
goto out;
-- 
1.8.3.1



[PATCH v6 6/8] vdpa/ifc: support dynamic enable/disable queue

2022-10-17 Thread Andy Pei
From: Huang Wei 

Support dynamic enable or disable queue.
For front end, like QEMU, user can use ethtool to configure queue.
For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs.

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 100 ++
 drivers/vdpa/ifc/base/ifcvf.h |   6 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c |  93 ---
 3 files changed, 184 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 30bb8cb..869ddd6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -233,6 +233,106 @@
}
 }
 
+int
+ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u8 *lm_cfg;
+   u16 notify_off;
+   int msix_vector;
+
+   if (i >= (int)hw->nr_vring)
+   return -1;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return -1;
+   }
+
+   ifcvf_enable_mq(hw);
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector != (i + 1)) {
+   IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector == IFCVF_MSI_NO_VECTOR) {
+   RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n",
+   i);
+   return -1;
+   }
+   }
+
+   io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
+   &cfg->queue_desc_hi);
+   io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
+   &cfg->queue_avail_hi);
+   io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
+   &cfg->queue_used_hi);
+   IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK)
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   else
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
+
+   notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off);
+   hw->notify_addr[i] = (void *)((u8 *)hw->notify_base +
+   notify_off * hw->notify_off_multiplier);
+   IFCVF_WRITE_REG16(1, &cfg->queue_enable);
+
+   return 0;
+}
+
+void
+ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u32 ring_state;
+   u8 *lm_cfg;
+
+   if (i >= (int)hw->nr_vring)
+   return;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return;
+   }
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   IFCVF_WRITE_REG16(0, &cfg->queue_enable);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   } else {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+   }
+   hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 1e133c0..3726da7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -164,6 +164,12 @@ struct ifcvf_hw {
 ifcvf_get_features(struct ifcvf_hw *hw);
 
 int
+ifcvf_en

[PATCH v6 8/8] vhost: improve vDPA blk device configure condition

2022-10-17 Thread Andy Pei
To support multi-queue, configure device
after call fd of all queues are set.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 lib/vhost/vhost_user.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 53806fa..2c50d13 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2973,6 +2973,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
uint32_t vdpa_type = -1;
uint32_t request;
uint32_t i;
+   uint16_t blk_call_fd;
 
dev = get_device(vid);
if (dev == NULL)
@@ -3201,9 +3202,15 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (!vdpa_dev)
goto out;
 
-   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
-   && request != VHOST_USER_SET_VRING_CALL)
-   goto out;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   if (request == VHOST_USER_SET_VRING_CALL) {
+   blk_call_fd = ctx.msg.payload.u64 & 
VHOST_USER_VRING_IDX_MASK;
+   if (blk_call_fd != dev->nr_vring - 1)
+   goto out;
+   } else {
+   goto out;
+   }
+   }
 
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
-- 
1.8.3.1



[PATCH v7 00/12] vdpa/ifc: add multi queue support

2022-10-17 Thread Andy Pei
v7:
 Fill vdpa_device_type in vdpa device registration.

v6:
 Add vdpa_device_type to rte_vdpa_device to store vDPA device type.

v5:
 fix some commit message.
 rework some code logic.

v4:
 fix some commit message.
 add some commets to code.
 fix some code to reduce confusion.

v3:
 rename device ID macro name.
 fix some patch title and commit message.
 delete some used marco.
 rework some code logic.

v2:
 fix some coding style issue.
 support dynamic enable/disable queue at run time.

Andy Pei (10):
  vdpa/ifc: add multi-queue support
  vdpa/ifc: set max queues based on virtio spec
  vdpa/ifc: write queue count to MQ register
  vdpa/ifc: only configure enabled queue
  vdpa/ifc: change internal function name
  vdpa/ifc: add internal API to get device.
  vdpa/ifc: change some driver logic
  vhost: add vdpa device type to rte vdpa device
  vhost: vDPA blk device gets ready when the first queue is ready
  vhost: improve vDPA blk device configure condition

Huang Wei (2):
  vdpa/ifc: add new device ID for legacy network device
  vdpa/ifc: support dynamic enable/disable queue

 drivers/vdpa/ifc/base/ifcvf.c | 144 
 drivers/vdpa/ifc/base/ifcvf.h |  16 +++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 185 +++---
 lib/vhost/socket.c|  15 +---
 lib/vhost/vdpa.c  |  17 
 lib/vhost/vdpa_driver.h   |   2 +
 lib/vhost/vhost_user.c|  40 +
 7 files changed, 358 insertions(+), 61 deletions(-)

-- 
1.8.3.1



[PATCH v7 01/12] vdpa/ifc: add new device ID for legacy network device

2022-10-17 Thread Andy Pei
From: Huang Wei 

Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000).
Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID".

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/base/ifcvf.h |  6 --
 drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 9d95aac..ef7697a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -12,12 +12,14 @@
 #define IFCVF_BLK  1
 
 #define IFCVF_VENDOR_ID 0x1AF4
-#define IFCVF_NET_DEVICE_ID 0x1041
+#define IFCVF_NET_MODERN_DEVICE_ID  0x1041
 #define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000
 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
 #define IFCVF_SUBSYS_VENDOR_ID  0x8086
 #define IFCVF_SUBSYS_DEVICE_ID  0x001A
-#define IFCVF_BLK_DEVICE_ID 0x0002
+#define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
+#define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
 #define IFCVF_MAX_QUEUES   1
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d5ac583..b4389a0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = {
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
- .device_id = IFCVF_NET_DEVICE_ID,
+ .device_id = IFCVF_NET_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
+ .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID,
+ .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID,
+   },
+
+   { .class_id = RTE_CLASS_ANY_ID,
+ .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .vendor_id = 0, /* sentinel */
-- 
1.8.3.1



[PATCH v7 02/12] vdpa/ifc: add multi-queue support

2022-10-17 Thread Andy Pei
Enable VHOST_USER_PROTOCOL_F_MQ feature.
Expose IFCVF_MQ_OFFSET register to enable multi-queue.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 drivers/vdpa/ifc/base/ifcvf.h | 2 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index f1e1474..81c68c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -90,6 +90,15 @@
if (!hw->lm_cfg)
WARNINGOUT("HW support live migration not support!\n");
 
+   /* For some hardware implementation, for example:
+* the BAR 4 of PF is NULL, while BAR 4 of VF is not.
+* This code makes sure hw->mq_cfg is a valid address.
+*/
+   if (hw->mem_resource[4].addr)
+   hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET;
+   else
+   hw->mq_cfg = NULL;
+
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
DEBUGOUT("capability incomplete\n");
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index ef7697a..d16d9ab 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -50,6 +50,7 @@
 
 #define IFCVF_LM_CFG_SIZE  0x40
 #define IFCVF_LM_RING_STATE_OFFSET 0x20
+#define IFCVF_MQ_OFFSET0x28
 
 #define IFCVF_LM_LOGGING_CTRL  0x0
 
@@ -149,6 +150,7 @@ struct ifcvf_hw {
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
u8 *lm_cfg;
+   u8 *mq_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
int device_type;
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index b4389a0..008cf89 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
+1ULL << VHOST_USER_PROTOCOL_F_MQ | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
 
 #define VDPA_BLK_PROTOCOL_FEATURES \
-- 
1.8.3.1



[PATCH v7 03/12] vdpa/ifc: set max queues based on virtio spec

2022-10-17 Thread Andy Pei
Set max_queues according to virtio spec.
For virtio BLK device, set max_queues to the value of num_queues
in struct virtio_blk_config.
For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg,
get the queue pair number using num_queues and set max_queues to it.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.h |  2 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index d16d9ab..1e133c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -21,7 +21,7 @@
 #define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
 #define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
-#define IFCVF_MAX_QUEUES   1
+#define IFCVF_MAX_QUEUES   32
 
 #ifndef VIRTIO_F_IOMMU_PLATFORM
 #define VIRTIO_F_IOMMU_PLATFORM33
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 008cf89..5a24204 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -26,6 +26,12 @@
 
 #include "base/ifcvf.h"
 
+/*
+ * RTE_MIN() cannot be used since braced-group within expression allowed
+ * only inside a function.
+ */
+#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2))
+
 RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 #define DRV_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
@@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = {
uint64_t capacity = 0;
uint8_t *byte;
uint32_t i;
+   uint16_t queue_pairs;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = {
}
 
internal->configured = 0;
-   internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
 
device_id = ifcvf_pci_get_device_type(pci_dev);
@@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->hw.device_type = IFCVF_NET;
+   /*
+* ifc device always has CTRL_VQ,
+* and supports VIRTIO_NET_F_CTRL_VQ feature.
+*/
+   queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2;
+   DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name,
+   queue_pairs);
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs);
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
@@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(DEBUG, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v7 04/12] vdpa/ifc: write queue count to MQ register

2022-10-17 Thread Andy Pei
Write queue count to IFCVF_MQ_OFFSET register
to enable multi-queue feature.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 81c68c0..b377126 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -202,6 +202,37 @@
IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
+STATIC void
+ifcvf_enable_mq(struct ifcvf_hw *hw)
+{
+   u8 *mq_cfg;
+   u8 qid;
+   int nr_queue = 0;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
+   nr_queue++;
+   }
+
+   if (nr_queue == 0) {
+   WARNINGOUT("no enabled vring\n");
+   return;
+   }
+
+   mq_cfg = hw->mq_cfg;
+   if (mq_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   *(u32 *)mq_cfg = nr_queue;
+   RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue);
+   } else {
+   *(u32 *)mq_cfg = nr_queue / 2;
+   RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n",
+   nr_queue / 2);
+   }
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
@@ -219,6 +250,7 @@
return -1;
}
 
+   ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
-- 
1.8.3.1



[PATCH v7 05/12] vdpa/ifc: only configure enabled queue

2022-10-17 Thread Andy Pei
When configuring the hardware queue, we only configure queues which
have been enabled by vhost.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index b377126..30bb8cb 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -252,6 +252,9 @@
 
ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a24204..0c3407a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -284,6 +284,8 @@ struct rte_vdpa_dev_info {
rte_vhost_get_negotiated_features(vid, &hw->req_features);
 
for (i = 0; i < nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
rte_vhost_get_vhost_vring(vid, i, &vq);
gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
if (gpa == 0) {
@@ -499,6 +501,8 @@ struct rte_vdpa_dev_info {
 
vring.kickfd = -1;
for (qid = 0; qid < q_num; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
ev.events = EPOLLIN | EPOLLPRI;
rte_vhost_get_vhost_vring(internal->vid, qid, &vring);
ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
@@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info {
struct rte_vdpa_device *vdev;
struct internal_list *list;
struct ifcvf_internal *internal;
+   struct ifcvf_hw *hw;
+   uint16_t i;
 
vdev = rte_vhost_get_vdpa_device(vid);
list = find_internal_resource_by_vdev(vdev);
@@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info {
rte_atomic32_set(&internal->dev_attached, 1);
update_datapath(internal);
 
-   if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
-   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
+   hw = &internal->hw;
+   for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+   if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0)
+   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
vdev->device->name);
+   }
 
internal->configured = 1;
+   DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name);
return 0;
 }
 
-- 
1.8.3.1



[PATCH v7 06/12] vdpa/ifc: support dynamic enable/disable queue

2022-10-17 Thread Andy Pei
From: Huang Wei 

Support dynamic enable or disable queue.
For front end, like QEMU, user can use ethtool to configure queue.
For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs.

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 100 ++
 drivers/vdpa/ifc/base/ifcvf.h |   6 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c |  93 ---
 3 files changed, 184 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 30bb8cb..869ddd6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -233,6 +233,106 @@
}
 }
 
+int
+ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u8 *lm_cfg;
+   u16 notify_off;
+   int msix_vector;
+
+   if (i >= (int)hw->nr_vring)
+   return -1;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return -1;
+   }
+
+   ifcvf_enable_mq(hw);
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector != (i + 1)) {
+   IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector == IFCVF_MSI_NO_VECTOR) {
+   RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n",
+   i);
+   return -1;
+   }
+   }
+
+   io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
+   &cfg->queue_desc_hi);
+   io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
+   &cfg->queue_avail_hi);
+   io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
+   &cfg->queue_used_hi);
+   IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK)
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   else
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
+
+   notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off);
+   hw->notify_addr[i] = (void *)((u8 *)hw->notify_base +
+   notify_off * hw->notify_off_multiplier);
+   IFCVF_WRITE_REG16(1, &cfg->queue_enable);
+
+   return 0;
+}
+
+void
+ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u32 ring_state;
+   u8 *lm_cfg;
+
+   if (i >= (int)hw->nr_vring)
+   return;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return;
+   }
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   IFCVF_WRITE_REG16(0, &cfg->queue_enable);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   } else {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+   }
+   hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 1e133c0..3726da7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -164,6 +164,12 @@ struct ifcvf_hw {
 ifcvf_get_features(struct ifcvf_hw *hw);
 
 int
+ifcvf_en

[PATCH v7 07/12] vdpa/ifc: change internal function name

2022-10-17 Thread Andy Pei
Change internal function name "find_internal_resource_by_dev"
to "find_internal_resource_by_pci_dev".

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9c49f9c..73d04ed 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -121,7 +121,7 @@ struct rte_vdpa_dev_info {
 }
 
 static struct internal_list *
-find_internal_resource_by_dev(struct rte_pci_device *pdev)
+find_internal_resource_by_pci_dev(struct rte_pci_device *pdev)
 {
int found = 0;
struct internal_list *list;
@@ -1746,7 +1746,7 @@ struct rte_vdpa_dev_info dev_info[] = {
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
 
-   list = find_internal_resource_by_dev(pci_dev);
+   list = find_internal_resource_by_pci_dev(pci_dev);
if (list == NULL) {
DRV_LOG(ERR, "Invalid device: %s", pci_dev->name);
return -1;
-- 
1.8.3.1



[PATCH v7 08/12] vdpa/ifc: add internal API to get device

2022-10-17 Thread Andy Pei
Add new internal API "find_internal_resource_by_rte_dev"
to get device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 73d04ed..c16e263 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -144,6 +144,29 @@ struct rte_vdpa_dev_info {
return list;
 }
 
+static struct internal_list *
+find_internal_resource_by_rte_dev(struct rte_device *rte_dev)
+{
+   int found = 0;
+   struct internal_list *list;
+
+   pthread_mutex_lock(&internal_list_lock);
+
+   TAILQ_FOREACH(list, &internal_list, next) {
+   if (rte_dev == &list->internal->pdev->device) {
+   found = 1;
+   break;
+   }
+   }
+
+   pthread_mutex_unlock(&internal_list_lock);
+
+   if (!found)
+   return NULL;
+
+   return list;
+}
+
 static int
 ifcvf_vfio_setup(struct ifcvf_internal *internal)
 {
@@ -1398,10 +1421,11 @@ struct rte_vdpa_dev_info {
 {
struct ifcvf_internal *internal;
struct internal_list *list;
+   struct rte_device *rte_dev = vdev->device;
 
-   list = find_internal_resource_by_vdev(vdev);
+   list = find_internal_resource_by_rte_dev(rte_dev);
if (list == NULL) {
-   DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+   DRV_LOG(ERR, "Invalid rte device: %p", rte_dev);
return -1;
}
 
-- 
1.8.3.1



[PATCH v7 09/12] vdpa/ifc: change some driver logic

2022-10-17 Thread Andy Pei
Insert internal list element to internal list before
register vdpa device, in order to call vdpa ops during
vdpa device registration.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c16e263..8dfd493 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1737,17 +1737,20 @@ struct rte_vdpa_dev_info dev_info[] = {
}
internal->sw_lm = sw_fallback_lm;
 
+   pthread_mutex_lock(&internal_list_lock);
+   TAILQ_INSERT_TAIL(&internal_list, list, next);
+   pthread_mutex_unlock(&internal_list_lock);
+
internal->vdev = rte_vdpa_register_device(&pci_dev->device,
dev_info[internal->hw.device_type].ops);
if (internal->vdev == NULL) {
DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
+   pthread_mutex_lock(&internal_list_lock);
+   TAILQ_REMOVE(&internal_list, list, next);
+   pthread_mutex_unlock(&internal_list_lock);
goto error;
}
 
-   pthread_mutex_lock(&internal_list_lock);
-   TAILQ_INSERT_TAIL(&internal_list, list, next);
-   pthread_mutex_unlock(&internal_list_lock);
-
rte_atomic32_set(&internal->started, 1);
update_datapath(internal);
 
-- 
1.8.3.1



[PATCH v7 10/12] vhost: add vdpa device type to rte vdpa device

2022-10-17 Thread Andy Pei
Add vdpa_device_type to rte_vdpa_device to store device type.
Call vdpa ops get_dev_type to fill vdpa_device_type
when register vdpa device.

Signed-off-by: Andy Pei 
---
 lib/vhost/socket.c  | 15 +--
 lib/vhost/vdpa.c| 17 +
 lib/vhost/vdpa_driver.h |  2 ++
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index 608ae57..f768114 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -627,7 +627,6 @@ struct rte_vdpa_device *
 {
struct vhost_user_socket *vsocket;
struct rte_vdpa_device *vdpa_dev;
-   uint32_t vdpa_type = 0;
int ret = 0;
 
pthread_mutex_lock(&vhost_user.mutex);
@@ -644,19 +643,7 @@ struct rte_vdpa_device *
goto unlock_exit;
}
 
-   if (vdpa_dev->ops->get_dev_type) {
-   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
-   if (ret) {
-   VHOST_LOG_CONFIG(path, ERR,
-   "failed to get vdpa dev type for socket 
file.\n");
-   ret = -1;
-   goto unlock_exit;
-   }
-   } else {
-   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
-   }
-
-   *type = vdpa_type;
+   *type = vdpa_dev->vdpa_device_type;
 
 unlock_exit:
pthread_mutex_unlock(&vhost_user.mutex);
diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c
index bb82857..b487f4d 100644
--- a/lib/vhost/vdpa.c
+++ b/lib/vhost/vdpa.c
@@ -73,6 +73,8 @@ struct rte_vdpa_device *
struct rte_vdpa_dev_ops *ops)
 {
struct rte_vdpa_device *dev;
+   uint32_t vdpa_type = -1;
+   int ret = 0;
 
if (ops == NULL)
return NULL;
@@ -101,6 +103,21 @@ struct rte_vdpa_device *
 
dev->device = rte_dev;
dev->ops = ops;
+
+   if (ops->get_dev_type) {
+   ret = ops->get_dev_type(dev, &vdpa_type);
+   if (ret) {
+   VHOST_LOG_CONFIG(rte_dev->name, ERR,
+"Failed to get vdpa dev type.\n");
+   ret = -1;
+   goto out_unlock;
+   }
+   } else {
+   /** by default, we assume vdpa device is a net device */
+   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+   }
+   dev->vdpa_device_type = vdpa_type;
+
TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next);
 out_unlock:
rte_spinlock_unlock(&vdpa_device_list_lock);
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8b88a53..c4ec222 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -92,6 +92,8 @@ struct rte_vdpa_device {
struct rte_device *device;
/** vdpa device operations */
struct rte_vdpa_dev_ops *ops;
+   /** vdpa device type: net, blk... */
+   uint32_t vdpa_device_type;
 };
 
 /**
-- 
1.8.3.1



[PATCH v7 11/12] vhost: vDPA blk device gets ready when the first queue is ready

2022-10-17 Thread Andy Pei
When boot from virtio blk device, seabios in QEMU only enables one queue.
To work in this scenario, vDPA BLK device back-end configure device
when the first queue is ready.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
---
 lib/vhost/vhost_user.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index cd65257..d5dbd9b 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1441,11 +1441,14 @@
 }
 
 #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u
+#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u
 
 static int
 virtio_is_ready(struct virtio_net *dev)
 {
+   struct rte_vdpa_device *vdpa_dev;
struct vhost_virtqueue *vq;
+   uint32_t vdpa_type;
uint32_t i, nr_vring = dev->nr_vring;
 
if (dev->flags & VIRTIO_DEV_READY)
@@ -1454,13 +1457,22 @@
if (!dev->nr_vring)
return 0;
 
-   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) {
-   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
+   vdpa_dev = dev->vdpa_dev;
+   if (vdpa_dev)
+   vdpa_type = vdpa_dev->vdpa_device_type;
+   else
+   vdpa_type = -1;
 
-   if (dev->nr_vring < nr_vring)
-   return 0;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY;
+   } else {
+   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET)
+   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
}
 
+   if (dev->nr_vring < nr_vring)
+   return 0;
+
for (i = 0; i < nr_vring; i++) {
vq = dev->virtqueue[i];
 
@@ -2958,7 +2970,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
int ret;
int unlock_required = 0;
bool handled;
-   uint32_t vdpa_type = 0;
+   uint32_t vdpa_type;
uint32_t request;
uint32_t i;
 
@@ -3170,16 +3182,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (!vdpa_dev)
goto out;
 
-   if (vdpa_dev->ops->get_dev_type) {
-   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
-   if (ret) {
-   VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa 
dev type.\n");
-   ret = -1;
-   goto out;
-   }
-   } else {
-   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
-   }
+   vdpa_type = vdpa_dev->vdpa_device_type;
if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
&& request != VHOST_USER_SET_VRING_CALL)
goto out;
-- 
1.8.3.1



[PATCH v7 12/12] vhost: improve vDPA blk device configure condition

2022-10-17 Thread Andy Pei
To support multi-queue, configure device
after call fd of all queues are set.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 lib/vhost/vhost_user.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index d5dbd9b..96383b9 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2973,6 +2973,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
uint32_t vdpa_type;
uint32_t request;
uint32_t i;
+   uint16_t blk_call_fd;
 
dev = get_device(vid);
if (dev == NULL)
@@ -3183,9 +3184,15 @@ static int is_vring_iotlb(struct virtio_net *dev,
goto out;
 
vdpa_type = vdpa_dev->vdpa_device_type;
-   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
-   && request != VHOST_USER_SET_VRING_CALL)
-   goto out;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   if (request == VHOST_USER_SET_VRING_CALL) {
+   blk_call_fd = ctx.msg.payload.u64 & 
VHOST_USER_VRING_IDX_MASK;
+   if (blk_call_fd != dev->nr_vring - 1)
+   goto out;
+   } else {
+   goto out;
+   }
+   }
 
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
-- 
1.8.3.1



[PATCH v8 00/12] vdpa/ifc: add multi queue support

2022-10-18 Thread Andy Pei
v8:
 change "vdpa_device_type" in "rte_vdpa_device" to "type".

v7:
 Fill vdpa_device_type in vdpa device registration.

v6:
 Add vdpa_device_type to rte_vdpa_device to store vDPA device type.

v5:
 fix some commit message.
 rework some code logic.

v4:
 fix some commit message.
 add some commets to code.
 fix some code to reduce confusion.

v3:
 rename device ID macro name.
 fix some patch title and commit message.
 delete some used marco.
 rework some code logic.

v2:
 fix some coding style issue.
 support dynamic enable/disable queue at run time.

Andy Pei (10):
  vdpa/ifc: add multi-queue support
  vdpa/ifc: set max queues based on virtio spec
  vdpa/ifc: write queue count to MQ register
  vdpa/ifc: only configure enabled queue
  vdpa/ifc: change internal function name
  vdpa/ifc: add internal API to get device
  vdpa/ifc: change some driver logic
  vhost: add type to rte vdpa device
  vhost: vDPA blk device gets ready when the first queue is ready
  vhost: improve vDPA blk device configure condition

Huang Wei (2):
  vdpa/ifc: add new device ID for legacy network device
  vdpa/ifc: support dynamic enable/disable queue

 drivers/vdpa/ifc/base/ifcvf.c | 144 
 drivers/vdpa/ifc/base/ifcvf.h |  16 +++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 185 +++---
 lib/vhost/socket.c|  15 +---
 lib/vhost/vdpa.c  |  15 
 lib/vhost/vdpa_driver.h   |   2 +
 lib/vhost/vhost_user.c|  38 +
 7 files changed, 354 insertions(+), 61 deletions(-)

-- 
1.8.3.1



[PATCH v8 01/12] vdpa/ifc: add new device ID for legacy network device

2022-10-18 Thread Andy Pei
From: Huang Wei 

Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000).
Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID".

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/base/ifcvf.h |  6 --
 drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 9d95aac..ef7697a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -12,12 +12,14 @@
 #define IFCVF_BLK  1
 
 #define IFCVF_VENDOR_ID 0x1AF4
-#define IFCVF_NET_DEVICE_ID 0x1041
+#define IFCVF_NET_MODERN_DEVICE_ID  0x1041
 #define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000
 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
 #define IFCVF_SUBSYS_VENDOR_ID  0x8086
 #define IFCVF_SUBSYS_DEVICE_ID  0x001A
-#define IFCVF_BLK_DEVICE_ID 0x0002
+#define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
+#define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
 #define IFCVF_MAX_QUEUES   1
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d5ac583..b4389a0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = {
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
- .device_id = IFCVF_NET_DEVICE_ID,
+ .device_id = IFCVF_NET_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
+ .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID,
+ .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID,
+   },
+
+   { .class_id = RTE_CLASS_ANY_ID,
+ .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .vendor_id = 0, /* sentinel */
-- 
1.8.3.1



[PATCH v8 03/12] vdpa/ifc: set max queues based on virtio spec

2022-10-18 Thread Andy Pei
Set max_queues according to virtio spec.
For virtio BLK device, set max_queues to the value of num_queues
in struct virtio_blk_config.
For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg,
get the queue pair number using num_queues and set max_queues to it.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.h |  2 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index d16d9ab..1e133c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -21,7 +21,7 @@
 #define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
 #define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
-#define IFCVF_MAX_QUEUES   1
+#define IFCVF_MAX_QUEUES   32
 
 #ifndef VIRTIO_F_IOMMU_PLATFORM
 #define VIRTIO_F_IOMMU_PLATFORM33
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 008cf89..5a24204 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -26,6 +26,12 @@
 
 #include "base/ifcvf.h"
 
+/*
+ * RTE_MIN() cannot be used since braced-group within expression allowed
+ * only inside a function.
+ */
+#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2))
+
 RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 #define DRV_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
@@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = {
uint64_t capacity = 0;
uint8_t *byte;
uint32_t i;
+   uint16_t queue_pairs;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = {
}
 
internal->configured = 0;
-   internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
 
device_id = ifcvf_pci_get_device_type(pci_dev);
@@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->hw.device_type = IFCVF_NET;
+   /*
+* ifc device always has CTRL_VQ,
+* and supports VIRTIO_NET_F_CTRL_VQ feature.
+*/
+   queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2;
+   DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name,
+   queue_pairs);
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs);
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
@@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(DEBUG, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v8 02/12] vdpa/ifc: add multi-queue support

2022-10-18 Thread Andy Pei
Enable VHOST_USER_PROTOCOL_F_MQ feature.
Expose IFCVF_MQ_OFFSET register to enable multi-queue.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 drivers/vdpa/ifc/base/ifcvf.h | 2 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index f1e1474..81c68c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -90,6 +90,15 @@
if (!hw->lm_cfg)
WARNINGOUT("HW support live migration not support!\n");
 
+   /* For some hardware implementation, for example:
+* the BAR 4 of PF is NULL, while BAR 4 of VF is not.
+* This code makes sure hw->mq_cfg is a valid address.
+*/
+   if (hw->mem_resource[4].addr)
+   hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET;
+   else
+   hw->mq_cfg = NULL;
+
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
DEBUGOUT("capability incomplete\n");
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index ef7697a..d16d9ab 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -50,6 +50,7 @@
 
 #define IFCVF_LM_CFG_SIZE  0x40
 #define IFCVF_LM_RING_STATE_OFFSET 0x20
+#define IFCVF_MQ_OFFSET0x28
 
 #define IFCVF_LM_LOGGING_CTRL  0x0
 
@@ -149,6 +150,7 @@ struct ifcvf_hw {
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
u8 *lm_cfg;
+   u8 *mq_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
int device_type;
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index b4389a0..008cf89 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
+1ULL << VHOST_USER_PROTOCOL_F_MQ | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
 
 #define VDPA_BLK_PROTOCOL_FEATURES \
-- 
1.8.3.1



[PATCH v8 04/12] vdpa/ifc: write queue count to MQ register

2022-10-18 Thread Andy Pei
Write queue count to IFCVF_MQ_OFFSET register
to enable multi-queue feature.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 81c68c0..b377126 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -202,6 +202,37 @@
IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
+STATIC void
+ifcvf_enable_mq(struct ifcvf_hw *hw)
+{
+   u8 *mq_cfg;
+   u8 qid;
+   int nr_queue = 0;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
+   nr_queue++;
+   }
+
+   if (nr_queue == 0) {
+   WARNINGOUT("no enabled vring\n");
+   return;
+   }
+
+   mq_cfg = hw->mq_cfg;
+   if (mq_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   *(u32 *)mq_cfg = nr_queue;
+   RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue);
+   } else {
+   *(u32 *)mq_cfg = nr_queue / 2;
+   RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n",
+   nr_queue / 2);
+   }
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
@@ -219,6 +250,7 @@
return -1;
}
 
+   ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
-- 
1.8.3.1



[PATCH v8 05/12] vdpa/ifc: only configure enabled queue

2022-10-18 Thread Andy Pei
When configuring the hardware queue, we only configure queues which
have been enabled by vhost.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index b377126..30bb8cb 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -252,6 +252,9 @@
 
ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a24204..0c3407a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -284,6 +284,8 @@ struct rte_vdpa_dev_info {
rte_vhost_get_negotiated_features(vid, &hw->req_features);
 
for (i = 0; i < nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
rte_vhost_get_vhost_vring(vid, i, &vq);
gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
if (gpa == 0) {
@@ -499,6 +501,8 @@ struct rte_vdpa_dev_info {
 
vring.kickfd = -1;
for (qid = 0; qid < q_num; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
ev.events = EPOLLIN | EPOLLPRI;
rte_vhost_get_vhost_vring(internal->vid, qid, &vring);
ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
@@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info {
struct rte_vdpa_device *vdev;
struct internal_list *list;
struct ifcvf_internal *internal;
+   struct ifcvf_hw *hw;
+   uint16_t i;
 
vdev = rte_vhost_get_vdpa_device(vid);
list = find_internal_resource_by_vdev(vdev);
@@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info {
rte_atomic32_set(&internal->dev_attached, 1);
update_datapath(internal);
 
-   if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
-   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
+   hw = &internal->hw;
+   for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+   if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0)
+   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
vdev->device->name);
+   }
 
internal->configured = 1;
+   DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name);
return 0;
 }
 
-- 
1.8.3.1



[PATCH v8 07/12] vdpa/ifc: change internal function name

2022-10-18 Thread Andy Pei
Change internal function name "find_internal_resource_by_dev"
to "find_internal_resource_by_pci_dev".

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9c49f9c..73d04ed 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -121,7 +121,7 @@ struct rte_vdpa_dev_info {
 }
 
 static struct internal_list *
-find_internal_resource_by_dev(struct rte_pci_device *pdev)
+find_internal_resource_by_pci_dev(struct rte_pci_device *pdev)
 {
int found = 0;
struct internal_list *list;
@@ -1746,7 +1746,7 @@ struct rte_vdpa_dev_info dev_info[] = {
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
 
-   list = find_internal_resource_by_dev(pci_dev);
+   list = find_internal_resource_by_pci_dev(pci_dev);
if (list == NULL) {
DRV_LOG(ERR, "Invalid device: %s", pci_dev->name);
return -1;
-- 
1.8.3.1



[PATCH v8 06/12] vdpa/ifc: support dynamic enable/disable queue

2022-10-18 Thread Andy Pei
From: Huang Wei 

Support dynamic enable or disable queue.
For front end, like QEMU, user can use ethtool to configure queue.
For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs.

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 100 ++
 drivers/vdpa/ifc/base/ifcvf.h |   6 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c |  93 ---
 3 files changed, 184 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 30bb8cb..869ddd6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -233,6 +233,106 @@
}
 }
 
+int
+ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u8 *lm_cfg;
+   u16 notify_off;
+   int msix_vector;
+
+   if (i >= (int)hw->nr_vring)
+   return -1;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return -1;
+   }
+
+   ifcvf_enable_mq(hw);
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector != (i + 1)) {
+   IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector == IFCVF_MSI_NO_VECTOR) {
+   RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n",
+   i);
+   return -1;
+   }
+   }
+
+   io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
+   &cfg->queue_desc_hi);
+   io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
+   &cfg->queue_avail_hi);
+   io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
+   &cfg->queue_used_hi);
+   IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK)
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   else
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
+
+   notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off);
+   hw->notify_addr[i] = (void *)((u8 *)hw->notify_base +
+   notify_off * hw->notify_off_multiplier);
+   IFCVF_WRITE_REG16(1, &cfg->queue_enable);
+
+   return 0;
+}
+
+void
+ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u32 ring_state;
+   u8 *lm_cfg;
+
+   if (i >= (int)hw->nr_vring)
+   return;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return;
+   }
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   IFCVF_WRITE_REG16(0, &cfg->queue_enable);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   } else {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+   }
+   hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 1e133c0..3726da7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -164,6 +164,12 @@ struct ifcvf_hw {
 ifcvf_get_features(struct ifcvf_hw *hw);
 
 int
+ifcvf_en

[PATCH v8 08/12] vdpa/ifc: add internal API to get device

2022-10-18 Thread Andy Pei
Add new internal API "find_internal_resource_by_rte_dev"
to get device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 73d04ed..c16e263 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -144,6 +144,29 @@ struct rte_vdpa_dev_info {
return list;
 }
 
+static struct internal_list *
+find_internal_resource_by_rte_dev(struct rte_device *rte_dev)
+{
+   int found = 0;
+   struct internal_list *list;
+
+   pthread_mutex_lock(&internal_list_lock);
+
+   TAILQ_FOREACH(list, &internal_list, next) {
+   if (rte_dev == &list->internal->pdev->device) {
+   found = 1;
+   break;
+   }
+   }
+
+   pthread_mutex_unlock(&internal_list_lock);
+
+   if (!found)
+   return NULL;
+
+   return list;
+}
+
 static int
 ifcvf_vfio_setup(struct ifcvf_internal *internal)
 {
@@ -1398,10 +1421,11 @@ struct rte_vdpa_dev_info {
 {
struct ifcvf_internal *internal;
struct internal_list *list;
+   struct rte_device *rte_dev = vdev->device;
 
-   list = find_internal_resource_by_vdev(vdev);
+   list = find_internal_resource_by_rte_dev(rte_dev);
if (list == NULL) {
-   DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+   DRV_LOG(ERR, "Invalid rte device: %p", rte_dev);
return -1;
}
 
-- 
1.8.3.1



[PATCH v8 09/12] vdpa/ifc: change some driver logic

2022-10-18 Thread Andy Pei
Insert internal list element to internal list before
register vdpa device, in order to call vdpa ops during
vdpa device registration.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c16e263..8dfd493 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1737,17 +1737,20 @@ struct rte_vdpa_dev_info dev_info[] = {
}
internal->sw_lm = sw_fallback_lm;
 
+   pthread_mutex_lock(&internal_list_lock);
+   TAILQ_INSERT_TAIL(&internal_list, list, next);
+   pthread_mutex_unlock(&internal_list_lock);
+
internal->vdev = rte_vdpa_register_device(&pci_dev->device,
dev_info[internal->hw.device_type].ops);
if (internal->vdev == NULL) {
DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
+   pthread_mutex_lock(&internal_list_lock);
+   TAILQ_REMOVE(&internal_list, list, next);
+   pthread_mutex_unlock(&internal_list_lock);
goto error;
}
 
-   pthread_mutex_lock(&internal_list_lock);
-   TAILQ_INSERT_TAIL(&internal_list, list, next);
-   pthread_mutex_unlock(&internal_list_lock);
-
rte_atomic32_set(&internal->started, 1);
update_datapath(internal);
 
-- 
1.8.3.1



[PATCH v8 10/12] vhost: add type to rte vdpa device

2022-10-18 Thread Andy Pei
Add type to rte_vdpa_device to store device type.
Call vdpa ops get_dev_type to fill type when register
vdpa device.

Signed-off-by: Andy Pei 
---
 lib/vhost/socket.c  | 15 +--
 lib/vhost/vdpa.c| 15 +++
 lib/vhost/vdpa_driver.h |  2 ++
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index 608ae57..863a6f6 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -627,7 +627,6 @@ struct rte_vdpa_device *
 {
struct vhost_user_socket *vsocket;
struct rte_vdpa_device *vdpa_dev;
-   uint32_t vdpa_type = 0;
int ret = 0;
 
pthread_mutex_lock(&vhost_user.mutex);
@@ -644,19 +643,7 @@ struct rte_vdpa_device *
goto unlock_exit;
}
 
-   if (vdpa_dev->ops->get_dev_type) {
-   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
-   if (ret) {
-   VHOST_LOG_CONFIG(path, ERR,
-   "failed to get vdpa dev type for socket 
file.\n");
-   ret = -1;
-   goto unlock_exit;
-   }
-   } else {
-   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
-   }
-
-   *type = vdpa_type;
+   *type = vdpa_dev->type;
 
 unlock_exit:
pthread_mutex_unlock(&vhost_user.mutex);
diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c
index bb82857..577cb00 100644
--- a/lib/vhost/vdpa.c
+++ b/lib/vhost/vdpa.c
@@ -73,6 +73,7 @@ struct rte_vdpa_device *
struct rte_vdpa_dev_ops *ops)
 {
struct rte_vdpa_device *dev;
+   int ret = 0;
 
if (ops == NULL)
return NULL;
@@ -101,6 +102,20 @@ struct rte_vdpa_device *
 
dev->device = rte_dev;
dev->ops = ops;
+
+   if (ops->get_dev_type) {
+   ret = ops->get_dev_type(dev, &dev->type);
+   if (ret) {
+   VHOST_LOG_CONFIG(rte_dev->name, ERR,
+"Failed to get vdpa dev type.\n");
+   ret = -1;
+   goto out_unlock;
+   }
+   } else {
+   /** by default, we assume vdpa device is a net device */
+   dev->type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+   }
+
TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next);
 out_unlock:
rte_spinlock_unlock(&vdpa_device_list_lock);
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8b88a53..8db4ab9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -92,6 +92,8 @@ struct rte_vdpa_device {
struct rte_device *device;
/** vdpa device operations */
struct rte_vdpa_dev_ops *ops;
+   /** vdpa device type: net, blk... */
+   uint32_t type;
 };
 
 /**
-- 
1.8.3.1



[PATCH v8 11/12] vhost: vDPA blk device gets ready when the first queue is ready

2022-10-18 Thread Andy Pei
When boot from virtio blk device, seabios in QEMU only enables one queue.
To work in this scenario, vDPA BLK device back-end configure device
when the first queue is ready.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
---
 lib/vhost/vhost_user.c | 33 +
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index cd65257..e0ff79d 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1441,11 +1441,14 @@
 }
 
 #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u
+#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u
 
 static int
 virtio_is_ready(struct virtio_net *dev)
 {
+   struct rte_vdpa_device *vdpa_dev;
struct vhost_virtqueue *vq;
+   uint32_t vdpa_type;
uint32_t i, nr_vring = dev->nr_vring;
 
if (dev->flags & VIRTIO_DEV_READY)
@@ -1454,13 +1457,22 @@
if (!dev->nr_vring)
return 0;
 
-   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) {
-   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
+   vdpa_dev = dev->vdpa_dev;
+   if (vdpa_dev)
+   vdpa_type = vdpa_dev->type;
+   else
+   vdpa_type = -1;
 
-   if (dev->nr_vring < nr_vring)
-   return 0;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY;
+   } else {
+   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET)
+   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
}
 
+   if (dev->nr_vring < nr_vring)
+   return 0;
+
for (i = 0; i < nr_vring; i++) {
vq = dev->virtqueue[i];
 
@@ -2958,7 +2970,6 @@ static int is_vring_iotlb(struct virtio_net *dev,
int ret;
int unlock_required = 0;
bool handled;
-   uint32_t vdpa_type = 0;
uint32_t request;
uint32_t i;
 
@@ -3170,17 +3181,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (!vdpa_dev)
goto out;
 
-   if (vdpa_dev->ops->get_dev_type) {
-   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
-   if (ret) {
-   VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa 
dev type.\n");
-   ret = -1;
-   goto out;
-   }
-   } else {
-   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
-   }
-   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
+   if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
&& request != VHOST_USER_SET_VRING_CALL)
goto out;
 
-- 
1.8.3.1



[PATCH v8 12/12] vhost: improve vDPA blk device configure condition

2022-10-18 Thread Andy Pei
To support multi-queue, configure device
after call fd of all queues are set.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
---
 lib/vhost/vhost_user.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index e0ff79d..9902ae9 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2972,6 +2972,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
bool handled;
uint32_t request;
uint32_t i;
+   uint16_t blk_call_fd;
 
dev = get_device(vid);
if (dev == NULL)
@@ -3181,9 +3182,15 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (!vdpa_dev)
goto out;
 
-   if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
-   && request != VHOST_USER_SET_VRING_CALL)
-   goto out;
+   if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   if (request == VHOST_USER_SET_VRING_CALL) {
+   blk_call_fd = ctx.msg.payload.u64 & 
VHOST_USER_VRING_IDX_MASK;
+   if (blk_call_fd != dev->nr_vring - 1)
+   goto out;
+   } else {
+   goto out;
+   }
+   }
 
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
-- 
1.8.3.1



[PATCH v9 01/12] vdpa/ifc: add new device ID for legacy network device

2022-10-19 Thread Andy Pei
From: Huang Wei 

Add new device id to support IFCVF_NET_TRANSITIONAL_DEVICE_ID (0x1000).
Rename macro from "IFCVF_BLK_DEVICE_ID" to "IFCVF_SUBSYS_BLK_DEVICE_ID".

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/base/ifcvf.h |  6 --
 drivers/vdpa/ifc/ifcvf_vdpa.c | 13 ++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 9d95aac..ef7697a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -12,12 +12,14 @@
 #define IFCVF_BLK  1
 
 #define IFCVF_VENDOR_ID 0x1AF4
-#define IFCVF_NET_DEVICE_ID 0x1041
+#define IFCVF_NET_MODERN_DEVICE_ID  0x1041
 #define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_NET_TRANSITIONAL_DEVICE_ID0x1000
 #define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
 #define IFCVF_SUBSYS_VENDOR_ID  0x8086
 #define IFCVF_SUBSYS_DEVICE_ID  0x001A
-#define IFCVF_BLK_DEVICE_ID 0x0002
+#define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
+#define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
 #define IFCVF_MAX_QUEUES   1
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d5ac583..b4389a0 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1684,23 +1684,30 @@ struct rte_vdpa_dev_info dev_info[] = {
 static const struct rte_pci_id pci_id_ifcvf_map[] = {
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
- .device_id = IFCVF_NET_DEVICE_ID,
+ .device_id = IFCVF_NET_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
  .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
+ .device_id = IFCVF_NET_TRANSITIONAL_DEVICE_ID,
+ .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_NET_DEVICE_ID,
+   },
+
+   { .class_id = RTE_CLASS_ANY_ID,
+ .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_TRANSITIONAL_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .class_id = RTE_CLASS_ANY_ID,
  .vendor_id = IFCVF_VENDOR_ID,
  .device_id = IFCVF_BLK_MODERN_DEVICE_ID,
  .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
- .subsystem_device_id = IFCVF_BLK_DEVICE_ID,
+ .subsystem_device_id = IFCVF_SUBSYS_BLK_DEVICE_ID,
},
 
{ .vendor_id = 0, /* sentinel */
-- 
1.8.3.1



[PATCH v9 00/12] vdpa/ifc: add multi queue support

2022-10-19 Thread Andy Pei
v9:
 fix some commit message.

v8:
 change "vdpa_device_type" in "rte_vdpa_device" to "type".

v7:
 Fill vdpa_device_type in vdpa device registration.

v6:
 Add vdpa_device_type to rte_vdpa_device to store vDPA device type.

v5:
 fix some commit message.
 rework some code logic.

v4:
 fix some commit message.
 add some commets to code.
 fix some code to reduce confusion.

v3:
 rename device ID macro name.
 fix some patch title and commit message.
 delete some used marco.
 rework some code logic.

v2:
 fix some coding style issue.
 support dynamic enable/disable queue at run time.

Andy Pei (10):
  vdpa/ifc: add multi-queue support
  vdpa/ifc: set max queues based on virtio spec
  vdpa/ifc: write queue count to MQ register
  vdpa/ifc: only configure enabled queue
  vdpa/ifc: change internal function name
  vdpa/ifc: add internal API to get device
  vdpa/ifc: improve internal list logic
  vhost: add type to rte vdpa device
  vhost: vDPA blk device gets ready when the first queue is ready
  vhost: improve vDPA blk device configure condition

Huang Wei (2):
  vdpa/ifc: add new device ID for legacy network device
  vdpa/ifc: support dynamic enable/disable queue

 drivers/vdpa/ifc/base/ifcvf.c | 144 
 drivers/vdpa/ifc/base/ifcvf.h |  16 +++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 185 +++---
 lib/vhost/socket.c|  15 +---
 lib/vhost/vdpa.c  |  15 
 lib/vhost/vdpa_driver.h   |   2 +
 lib/vhost/vhost_user.c|  38 +
 7 files changed, 354 insertions(+), 61 deletions(-)

-- 
1.8.3.1



[PATCH v9 02/12] vdpa/ifc: add multi-queue support

2022-10-19 Thread Andy Pei
Enable VHOST_USER_PROTOCOL_F_MQ feature.
Expose IFCVF_MQ_OFFSET register to enable multi-queue.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 drivers/vdpa/ifc/base/ifcvf.h | 2 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 1 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index f1e1474..81c68c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -90,6 +90,15 @@
if (!hw->lm_cfg)
WARNINGOUT("HW support live migration not support!\n");
 
+   /* For some hardware implementation, for example:
+* the BAR 4 of PF is NULL, while BAR 4 of VF is not.
+* This code makes sure hw->mq_cfg is a valid address.
+*/
+   if (hw->mem_resource[4].addr)
+   hw->mq_cfg = hw->mem_resource[4].addr + IFCVF_MQ_OFFSET;
+   else
+   hw->mq_cfg = NULL;
+
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
DEBUGOUT("capability incomplete\n");
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index ef7697a..d16d9ab 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -50,6 +50,7 @@
 
 #define IFCVF_LM_CFG_SIZE  0x40
 #define IFCVF_LM_RING_STATE_OFFSET 0x20
+#define IFCVF_MQ_OFFSET0x28
 
 #define IFCVF_LM_LOGGING_CTRL  0x0
 
@@ -149,6 +150,7 @@ struct ifcvf_hw {
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
u8 *lm_cfg;
+   u8 *mq_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
int device_type;
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index b4389a0..008cf89 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1248,6 +1248,7 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | \
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
+1ULL << VHOST_USER_PROTOCOL_F_MQ | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
 
 #define VDPA_BLK_PROTOCOL_FEATURES \
-- 
1.8.3.1



[PATCH v9 03/12] vdpa/ifc: set max queues based on virtio spec

2022-10-19 Thread Andy Pei
Set max_queues according to virtio spec.
For virtio BLK device, set max_queues to the value of num_queues
in struct virtio_blk_config.
For virtio NET device, read num_queues from struct ifcvf_pci_common_cfg,
get the queue pair number using num_queues and set max_queues to it.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.h |  2 +-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 19 ++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index d16d9ab..1e133c0 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -21,7 +21,7 @@
 #define IFCVF_SUBSYS_NET_DEVICE_ID  0x0001
 #define IFCVF_SUBSYS_BLK_DEVICE_ID  0x0002
 
-#define IFCVF_MAX_QUEUES   1
+#define IFCVF_MAX_QUEUES   32
 
 #ifndef VIRTIO_F_IOMMU_PLATFORM
 #define VIRTIO_F_IOMMU_PLATFORM33
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 008cf89..5a24204 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -26,6 +26,12 @@
 
 #include "base/ifcvf.h"
 
+/*
+ * RTE_MIN() cannot be used since braced-group within expression allowed
+ * only inside a function.
+ */
+#define MIN(v1, v2)((v1) < (v2) ? (v1) : (v2))
+
 RTE_LOG_REGISTER(ifcvf_vdpa_logtype, pmd.vdpa.ifcvf, NOTICE);
 #define DRV_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
@@ -1512,6 +1518,7 @@ struct rte_vdpa_dev_info dev_info[] = {
uint64_t capacity = 0;
uint8_t *byte;
uint32_t i;
+   uint16_t queue_pairs;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1559,7 +1566,6 @@ struct rte_vdpa_dev_info dev_info[] = {
}
 
internal->configured = 0;
-   internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
 
device_id = ifcvf_pci_get_device_type(pci_dev);
@@ -1570,6 +1576,14 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->hw.device_type = IFCVF_NET;
+   /*
+* ifc device always has CTRL_VQ,
+* and supports VIRTIO_NET_F_CTRL_VQ feature.
+*/
+   queue_pairs = (internal->hw.common_cfg->num_queues - 1) / 2;
+   DRV_LOG(INFO, "%s support %u queue pairs", pci_dev->name,
+   queue_pairs);
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES, queue_pairs);
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
@@ -1609,6 +1623,9 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(DEBUG, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   internal->max_queues = MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v9 06/12] vdpa/ifc: support dynamic enable/disable queue

2022-10-19 Thread Andy Pei
From: Huang Wei 

Support dynamic enable or disable queue.
For front end, like QEMU, user can use ethtool to configure queue.
For example, "ethtool -L eth0 combined 3" to enable 3 queues pairs.

Signed-off-by: Huang Wei 
Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 100 ++
 drivers/vdpa/ifc/base/ifcvf.h |   6 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c |  93 ---
 3 files changed, 184 insertions(+), 15 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 30bb8cb..869ddd6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -233,6 +233,106 @@
}
 }
 
+int
+ifcvf_enable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u8 *lm_cfg;
+   u16 notify_off;
+   int msix_vector;
+
+   if (i >= (int)hw->nr_vring)
+   return -1;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return -1;
+   }
+
+   ifcvf_enable_mq(hw);
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector != (i + 1)) {
+   IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
+   msix_vector = IFCVF_READ_REG16(&cfg->queue_msix_vector);
+   if (msix_vector == IFCVF_MSI_NO_VECTOR) {
+   RTE_LOG(ERR, PMD, "queue %d, msix vec alloc failed\n",
+   i);
+   return -1;
+   }
+   }
+
+   io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
+   &cfg->queue_desc_hi);
+   io_write64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo,
+   &cfg->queue_avail_hi);
+   io_write64_twopart(hw->vring[i].used, &cfg->queue_used_lo,
+   &cfg->queue_used_hi);
+   IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK)
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   else
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
+
+   notify_off = IFCVF_READ_REG16(&cfg->queue_notify_off);
+   hw->notify_addr[i] = (void *)((u8 *)hw->notify_base +
+   notify_off * hw->notify_off_multiplier);
+   IFCVF_WRITE_REG16(1, &cfg->queue_enable);
+
+   return 0;
+}
+
+void
+ifcvf_disable_vring_hw(struct ifcvf_hw *hw, int i)
+{
+   struct ifcvf_pci_common_cfg *cfg;
+   u32 ring_state;
+   u8 *lm_cfg;
+
+   if (i >= (int)hw->nr_vring)
+   return;
+
+   cfg = hw->common_cfg;
+   if (!cfg) {
+   RTE_LOG(ERR, PMD, "common_cfg in HW is NULL.\n");
+   return;
+   }
+
+   IFCVF_WRITE_REG16(i, &cfg->queue_select);
+   IFCVF_WRITE_REG16(0, &cfg->queue_enable);
+
+   lm_cfg = hw->lm_cfg;
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   } else {
+   ring_state = *(u32 *)(lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+   }
+   hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 1e133c0..3726da7 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -164,6 +164,12 @@ struct ifcvf_hw {
 ifcvf_get_features(struct ifcvf_hw *hw);
 
 int
+ifcvf_en

[PATCH v9 04/12] vdpa/ifc: write queue count to MQ register

2022-10-19 Thread Andy Pei
Write queue count to IFCVF_MQ_OFFSET register
to enable multi-queue feature.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 81c68c0..b377126 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -202,6 +202,37 @@
IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
+STATIC void
+ifcvf_enable_mq(struct ifcvf_hw *hw)
+{
+   u8 *mq_cfg;
+   u8 qid;
+   int nr_queue = 0;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
+   nr_queue++;
+   }
+
+   if (nr_queue == 0) {
+   WARNINGOUT("no enabled vring\n");
+   return;
+   }
+
+   mq_cfg = hw->mq_cfg;
+   if (mq_cfg) {
+   if (hw->device_type == IFCVF_BLK) {
+   *(u32 *)mq_cfg = nr_queue;
+   RTE_LOG(INFO, PMD, "%d queues are enabled\n", nr_queue);
+   } else {
+   *(u32 *)mq_cfg = nr_queue / 2;
+   RTE_LOG(INFO, PMD, "%d queue pairs are enabled\n",
+   nr_queue / 2);
+   }
+   }
+}
+
 STATIC int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
@@ -219,6 +250,7 @@
return -1;
}
 
+   ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
-- 
1.8.3.1



[PATCH v9 05/12] vdpa/ifc: only configure enabled queue

2022-10-19 Thread Andy Pei
When configuring the hardware queue, we only configure queues which
have been enabled by vhost.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
---
 drivers/vdpa/ifc/base/ifcvf.c |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 16 ++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index b377126..30bb8cb 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -252,6 +252,9 @@
 
ifcvf_enable_mq(hw);
for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+
IFCVF_WRITE_REG16(i, &cfg->queue_select);
io_write64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo,
&cfg->queue_desc_hi);
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 5a24204..0c3407a 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -284,6 +284,8 @@ struct rte_vdpa_dev_info {
rte_vhost_get_negotiated_features(vid, &hw->req_features);
 
for (i = 0; i < nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
rte_vhost_get_vhost_vring(vid, i, &vq);
gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
if (gpa == 0) {
@@ -499,6 +501,8 @@ struct rte_vdpa_dev_info {
 
vring.kickfd = -1;
for (qid = 0; qid < q_num; qid++) {
+   if (!hw->vring[qid].enable)
+   continue;
ev.events = EPOLLIN | EPOLLPRI;
rte_vhost_get_vhost_vring(internal->vid, qid, &vring);
ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
@@ -1058,6 +1062,8 @@ struct rte_vdpa_dev_info {
struct rte_vdpa_device *vdev;
struct internal_list *list;
struct ifcvf_internal *internal;
+   struct ifcvf_hw *hw;
+   uint16_t i;
 
vdev = rte_vhost_get_vdpa_device(vid);
list = find_internal_resource_by_vdev(vdev);
@@ -1071,11 +1077,17 @@ struct rte_vdpa_dev_info {
rte_atomic32_set(&internal->dev_attached, 1);
update_datapath(internal);
 
-   if (rte_vhost_host_notifier_ctrl(vid, RTE_VHOST_QUEUE_ALL, true) != 0)
-   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
+   hw = &internal->hw;
+   for (i = 0; i < hw->nr_vring; i++) {
+   if (!hw->vring[i].enable)
+   continue;
+   if (rte_vhost_host_notifier_ctrl(vid, i, true) != 0)
+   DRV_LOG(NOTICE, "vDPA (%s): software relay is used.",
vdev->device->name);
+   }
 
internal->configured = 1;
+   DRV_LOG(INFO, "vDPA device %s is configured", vdev->device->name);
return 0;
 }
 
-- 
1.8.3.1



[PATCH v9 07/12] vdpa/ifc: change internal function name

2022-10-19 Thread Andy Pei
Change internal function name "find_internal_resource_by_dev"
to "find_internal_resource_by_pci_dev".

Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9c49f9c..73d04ed 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -121,7 +121,7 @@ struct rte_vdpa_dev_info {
 }
 
 static struct internal_list *
-find_internal_resource_by_dev(struct rte_pci_device *pdev)
+find_internal_resource_by_pci_dev(struct rte_pci_device *pdev)
 {
int found = 0;
struct internal_list *list;
@@ -1746,7 +1746,7 @@ struct rte_vdpa_dev_info dev_info[] = {
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
 
-   list = find_internal_resource_by_dev(pci_dev);
+   list = find_internal_resource_by_pci_dev(pci_dev);
if (list == NULL) {
DRV_LOG(ERR, "Invalid device: %s", pci_dev->name);
return -1;
-- 
1.8.3.1



[PATCH v9 08/12] vdpa/ifc: add internal API to get device

2022-10-19 Thread Andy Pei
Add new internal API "find_internal_resource_by_rte_dev"
to get device.

Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 ++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 73d04ed..c16e263 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -144,6 +144,29 @@ struct rte_vdpa_dev_info {
return list;
 }
 
+static struct internal_list *
+find_internal_resource_by_rte_dev(struct rte_device *rte_dev)
+{
+   int found = 0;
+   struct internal_list *list;
+
+   pthread_mutex_lock(&internal_list_lock);
+
+   TAILQ_FOREACH(list, &internal_list, next) {
+   if (rte_dev == &list->internal->pdev->device) {
+   found = 1;
+   break;
+   }
+   }
+
+   pthread_mutex_unlock(&internal_list_lock);
+
+   if (!found)
+   return NULL;
+
+   return list;
+}
+
 static int
 ifcvf_vfio_setup(struct ifcvf_internal *internal)
 {
@@ -1398,10 +1421,11 @@ struct rte_vdpa_dev_info {
 {
struct ifcvf_internal *internal;
struct internal_list *list;
+   struct rte_device *rte_dev = vdev->device;
 
-   list = find_internal_resource_by_vdev(vdev);
+   list = find_internal_resource_by_rte_dev(rte_dev);
if (list == NULL) {
-   DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+   DRV_LOG(ERR, "Invalid rte device: %p", rte_dev);
return -1;
}
 
-- 
1.8.3.1



[PATCH v9 09/12] vdpa/ifc: improve internal list logic

2022-10-19 Thread Andy Pei
Insert internal list element to internal list before
register vdpa device, in order to call vdpa ops during
vdpa device registration.

Signed-off-by: Andy Pei 
Reviewed-by: Maxime Coquelin 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index c16e263..8dfd493 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1737,17 +1737,20 @@ struct rte_vdpa_dev_info dev_info[] = {
}
internal->sw_lm = sw_fallback_lm;
 
+   pthread_mutex_lock(&internal_list_lock);
+   TAILQ_INSERT_TAIL(&internal_list, list, next);
+   pthread_mutex_unlock(&internal_list_lock);
+
internal->vdev = rte_vdpa_register_device(&pci_dev->device,
dev_info[internal->hw.device_type].ops);
if (internal->vdev == NULL) {
DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
+   pthread_mutex_lock(&internal_list_lock);
+   TAILQ_REMOVE(&internal_list, list, next);
+   pthread_mutex_unlock(&internal_list_lock);
goto error;
}
 
-   pthread_mutex_lock(&internal_list_lock);
-   TAILQ_INSERT_TAIL(&internal_list, list, next);
-   pthread_mutex_unlock(&internal_list_lock);
-
rte_atomic32_set(&internal->started, 1);
update_datapath(internal);
 
-- 
1.8.3.1



[PATCH v9 10/12] vhost: add type to rte vdpa device

2022-10-19 Thread Andy Pei
Add type to rte_vdpa_device to store device type.
Call vdpa ops get_dev_type to fill type when register
vdpa device.

Signed-off-by: Andy Pei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/socket.c  | 15 +--
 lib/vhost/vdpa.c| 15 +++
 lib/vhost/vdpa_driver.h |  2 ++
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index 608ae57..863a6f6 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -627,7 +627,6 @@ struct rte_vdpa_device *
 {
struct vhost_user_socket *vsocket;
struct rte_vdpa_device *vdpa_dev;
-   uint32_t vdpa_type = 0;
int ret = 0;
 
pthread_mutex_lock(&vhost_user.mutex);
@@ -644,19 +643,7 @@ struct rte_vdpa_device *
goto unlock_exit;
}
 
-   if (vdpa_dev->ops->get_dev_type) {
-   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
-   if (ret) {
-   VHOST_LOG_CONFIG(path, ERR,
-   "failed to get vdpa dev type for socket 
file.\n");
-   ret = -1;
-   goto unlock_exit;
-   }
-   } else {
-   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
-   }
-
-   *type = vdpa_type;
+   *type = vdpa_dev->type;
 
 unlock_exit:
pthread_mutex_unlock(&vhost_user.mutex);
diff --git a/lib/vhost/vdpa.c b/lib/vhost/vdpa.c
index bb82857..577cb00 100644
--- a/lib/vhost/vdpa.c
+++ b/lib/vhost/vdpa.c
@@ -73,6 +73,7 @@ struct rte_vdpa_device *
struct rte_vdpa_dev_ops *ops)
 {
struct rte_vdpa_device *dev;
+   int ret = 0;
 
if (ops == NULL)
return NULL;
@@ -101,6 +102,20 @@ struct rte_vdpa_device *
 
dev->device = rte_dev;
dev->ops = ops;
+
+   if (ops->get_dev_type) {
+   ret = ops->get_dev_type(dev, &dev->type);
+   if (ret) {
+   VHOST_LOG_CONFIG(rte_dev->name, ERR,
+"Failed to get vdpa dev type.\n");
+   ret = -1;
+   goto out_unlock;
+   }
+   } else {
+   /** by default, we assume vdpa device is a net device */
+   dev->type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+   }
+
TAILQ_INSERT_TAIL(&vdpa_device_list, dev, next);
 out_unlock:
rte_spinlock_unlock(&vdpa_device_list_lock);
diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index 8b88a53..8db4ab9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -92,6 +92,8 @@ struct rte_vdpa_device {
struct rte_device *device;
/** vdpa device operations */
struct rte_vdpa_dev_ops *ops;
+   /** vdpa device type: net, blk... */
+   uint32_t type;
 };
 
 /**
-- 
1.8.3.1



[PATCH v9 11/12] vhost: vDPA blk device gets ready when the first queue is ready

2022-10-19 Thread Andy Pei
When boot from virtio blk device, seabios in QEMU only enables one queue.
To work in this scenario, vDPA BLK device back-end configure device
when the first queue is ready.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/vhost_user.c | 33 +
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index cd65257..e0ff79d 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -1441,11 +1441,14 @@
 }
 
 #define VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY 2u
+#define VIRTIO_BLK_NUM_VQS_TO_BE_READY 1u
 
 static int
 virtio_is_ready(struct virtio_net *dev)
 {
+   struct rte_vdpa_device *vdpa_dev;
struct vhost_virtqueue *vq;
+   uint32_t vdpa_type;
uint32_t i, nr_vring = dev->nr_vring;
 
if (dev->flags & VIRTIO_DEV_READY)
@@ -1454,13 +1457,22 @@
if (!dev->nr_vring)
return 0;
 
-   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET) {
-   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
+   vdpa_dev = dev->vdpa_dev;
+   if (vdpa_dev)
+   vdpa_type = vdpa_dev->type;
+   else
+   vdpa_type = -1;
 
-   if (dev->nr_vring < nr_vring)
-   return 0;
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   nr_vring = VIRTIO_BLK_NUM_VQS_TO_BE_READY;
+   } else {
+   if (dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET)
+   nr_vring = VIRTIO_BUILTIN_NUM_VQS_TO_BE_READY;
}
 
+   if (dev->nr_vring < nr_vring)
+   return 0;
+
for (i = 0; i < nr_vring; i++) {
vq = dev->virtqueue[i];
 
@@ -2958,7 +2970,6 @@ static int is_vring_iotlb(struct virtio_net *dev,
int ret;
int unlock_required = 0;
bool handled;
-   uint32_t vdpa_type = 0;
uint32_t request;
uint32_t i;
 
@@ -3170,17 +3181,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (!vdpa_dev)
goto out;
 
-   if (vdpa_dev->ops->get_dev_type) {
-   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
-   if (ret) {
-   VHOST_LOG_CONFIG(dev->ifname, ERR, "failed to get vdpa 
dev type.\n");
-   ret = -1;
-   goto out;
-   }
-   } else {
-   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
-   }
-   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
+   if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
&& request != VHOST_USER_SET_VRING_CALL)
goto out;
 
-- 
1.8.3.1



[PATCH v9 12/12] vhost: improve vDPA blk device configure condition

2022-10-19 Thread Andy Pei
To support multi-queue, configure device
after call fd of all queues are set.

Signed-off-by: Andy Pei 
Signed-off-by: Huang Wei 
Reviewed-by: Chenbo Xia 
Reviewed-by: Maxime Coquelin 
---
 lib/vhost/vhost_user.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index e0ff79d..9902ae9 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2972,6 +2972,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
bool handled;
uint32_t request;
uint32_t i;
+   uint16_t blk_call_fd;
 
dev = get_device(vid);
if (dev == NULL)
@@ -3181,9 +3182,15 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (!vdpa_dev)
goto out;
 
-   if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
-   && request != VHOST_USER_SET_VRING_CALL)
-   goto out;
+   if (vdpa_dev->type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK) {
+   if (request == VHOST_USER_SET_VRING_CALL) {
+   blk_call_fd = ctx.msg.payload.u64 & 
VHOST_USER_VRING_IDX_MASK;
+   if (blk_call_fd != dev->nr_vring - 1)
+   goto out;
+   } else {
+   goto out;
+   }
+   }
 
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
-- 
1.8.3.1



[PATCH] vdpa/ifc: fix null pointer dereference

2022-06-08 Thread Andy Pei
Fix null pointer dereference reported in coverity scan.

Coverity issue: 378882
Fixes: 8162a4a9 ("vdpa/ifc/base: access correct register for blk device")
Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index dd475a7..0a9f71a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -255,6 +255,10 @@
u32 ring_state;
 
cfg = hw->common_cfg;
+   if (!cfg) {
+   DEBUGOUT("common_cfg in HW is NULL.\n");
+   return;
+   }
 
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->msix_config);
for (i = 0; i < hw->nr_vring; i++) {
@@ -262,6 +266,11 @@
IFCVF_WRITE_REG16(0, &cfg->queue_enable);
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
 
+   if (!hw->lm_cfg) {
+   DEBUGOUT("live migration cfg in HW is NULL.\n");
+   continue;
+   }
+
if (hw->device_type == IFCVF_BLK)
ring_state = *(u32 *)(hw->lm_cfg +
IFCVF_LM_RING_STATE_OFFSET +
-- 
1.8.3.1



[PATCH v2] vdpa/ifc: fix null pointer dereference

2022-06-15 Thread Andy Pei
Fix null pointer dereference reported in coverity scan.

Coverity issue: 378882
Fixes: 5d75517beffe ("vdpa/ifc/base: access correct register for blk device")

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index dd475a7..0a9f71a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -255,6 +255,10 @@
u32 ring_state;
 
cfg = hw->common_cfg;
+   if (!cfg) {
+   DEBUGOUT("common_cfg in HW is NULL.\n");
+   return;
+   }
 
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->msix_config);
for (i = 0; i < hw->nr_vring; i++) {
@@ -262,6 +266,11 @@
IFCVF_WRITE_REG16(0, &cfg->queue_enable);
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
 
+   if (!hw->lm_cfg) {
+   DEBUGOUT("live migration cfg in HW is NULL.\n");
+   continue;
+   }
+
if (hw->device_type == IFCVF_BLK)
ring_state = *(u32 *)(hw->lm_cfg +
IFCVF_LM_RING_STATE_OFFSET +
-- 
1.8.3.1



[PATCH v2] vdpa/ifc: fix null pointer dereference

2022-06-15 Thread Andy Pei
Fix null pointer dereference reported in coverity scan.

Coverity issue: 378882
Fixes: 5d75517beffe ("vdpa/ifc/base: access correct register for blk device")

Signed-off-by: Andy Pei 
Acked-by: Xiao Wang 
---
 drivers/vdpa/ifc/base/ifcvf.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index dd475a7..0a9f71a 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -255,6 +255,10 @@
u32 ring_state;
 
cfg = hw->common_cfg;
+   if (!cfg) {
+   DEBUGOUT("common_cfg in HW is NULL.\n");
+   return;
+   }
 
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->msix_config);
for (i = 0; i < hw->nr_vring; i++) {
@@ -262,6 +266,11 @@
IFCVF_WRITE_REG16(0, &cfg->queue_enable);
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
 
+   if (!hw->lm_cfg) {
+   DEBUGOUT("live migration cfg in HW is NULL.\n");
+   continue;
+   }
+
if (hw->device_type == IFCVF_BLK)
ring_state = *(u32 *)(hw->lm_cfg +
IFCVF_LM_RING_STATE_OFFSET +
-- 
1.8.3.1



[PATCH] vdpa/ifc: fix vhost message size check issue

2022-06-21 Thread Andy Pei
For vhost message VHOST_USER_GET_CONFIG, we do not check
payload size in vhost lib, we check payload size in driver
specific ops.
For ifc vdpa driver, we just need to make sure payload size
is not smaller than sizeof(struct virtio_blk_config).

Fixes: 856d03bcdc54 ("vdpa/ifc: add block operations")

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 8bc971c..ac42de9 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1400,7 +1400,7 @@ struct rte_vdpa_dev_info {
uint64_t capacity = 0;
uint8_t *byte;
 
-   if (size != sizeof(struct virtio_blk_config)) {
+   if (size < sizeof(struct virtio_blk_config)) {
DRV_LOG(ERR, "Invalid len: %u, required: %u",
size, (uint32_t)sizeof(struct virtio_blk_config));
return -1;
-- 
1.8.3.1



[PATCH] vhost: fix virtio blk vDPA live migration IO drop

2022-06-22 Thread Andy Pei
In the virtio blk vDPA live migration use case, before the live
migration process, QEMU will set call fd to vDPA back-end. QEMU
and vDPA back-end stand by until live migration starts.
During live migration process, QEMU sets kick fd and a new call
fd. However, after the kick fd is set to the vDPA back-end, the
vDPA back-end configures device and data path starts. The new
call fd will cause some kind of "re-configuration", this kind
of "re-configuration" cause IO drop.
After this patch, vDPA back-end configures device after kick fd
and call fd are well set and make sure no IO drops.
This patch only impact virtio blk vDPA device and does not impact
net device.

Fixes: 7015b6577178 ("vdpa/ifc: add block device SW live-migration")

Signed-off-by: Andy Pei 
---
 lib/vhost/vhost_user.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 2b9a3b6..cc03f67 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -2946,6 +2946,7 @@ static int is_vring_iotlb(struct virtio_net *dev,
int ret;
int unlock_required = 0;
bool handled;
+   uint32_t vdpa_type = 0;
uint32_t request;
uint32_t i;
 
@@ -3152,6 +3153,20 @@ static int is_vring_iotlb(struct virtio_net *dev,
if (!vdpa_dev)
goto out;
 
+   if (vdpa_dev->ops->get_dev_type) {
+   ret = vdpa_dev->ops->get_dev_type(vdpa_dev, &vdpa_type);
+   if (ret) {
+   VHOST_LOG_CONFIG(ERR, "failed to get vdpa dev type.\n");
+   ret = -1;
+   goto out;
+   }
+   } else {
+   vdpa_type = RTE_VHOST_VDPA_DEVICE_TYPE_NET;
+   }
+   if (vdpa_type == RTE_VHOST_VDPA_DEVICE_TYPE_BLK
+   && request != VHOST_USER_SET_VRING_CALL)
+   goto out;
+
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
VHOST_LOG_CONFIG(ERR, "(%s) failed to configure vDPA 
device\n",
-- 
1.8.3.1



[PATCH] vdpa/ifc/base: fix null pointer dereference

2022-07-07 Thread Andy Pei
Fix null pointer dereference reported in coverity scan.
Output some log information when lm_cfg is null.
Make lm_cfg is not null before operate on lm_cfg.

Coverity issue: 378882
Fixes: d7fe5a2861e7 ("net/ifc: support live migration")

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c   | 31 ---
 drivers/vdpa/ifc/base/ifcvf_osdep.h |  1 +
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 0a9f71a..f1e1474 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -87,6 +87,8 @@
}
 
hw->lm_cfg = hw->mem_resource[4].addr;
+   if (!hw->lm_cfg)
+   WARNINGOUT("HW support live migration not support!\n");
 
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
@@ -218,17 +220,19 @@
&cfg->queue_used_hi);
IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-   if (hw->device_type == IFCVF_BLK)
-   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   i * IFCVF_LM_CFG_SIZE) =
-   (u32)hw->vring[i].last_avail_idx |
-   ((u32)hw->vring[i].last_used_idx << 16);
-   else
-   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE +
-   (i % 2) * 4) =
-   (u32)hw->vring[i].last_avail_idx |
-   ((u32)hw->vring[i].last_used_idx << 16);
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK)
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   else
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
 
IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -320,6 +324,8 @@
u8 *lm_cfg;
 
lm_cfg = hw->lm_cfg;
+   if (!lm_cfg)
+   return;
 
*(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) =
log_base & IFCVF_32_BIT_MASK;
@@ -342,6 +348,9 @@
u8 *lm_cfg;
 
lm_cfg = hw->lm_cfg;
+   if (!lm_cfg)
+   return;
+
*(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE;
 }
 
diff --git a/drivers/vdpa/ifc/base/ifcvf_osdep.h 
b/drivers/vdpa/ifc/base/ifcvf_osdep.h
index 6aef25e..8a47fcb 100644
--- a/drivers/vdpa/ifc/base/ifcvf_osdep.h
+++ b/drivers/vdpa/ifc/base/ifcvf_osdep.h
@@ -14,6 +14,7 @@
 #include 
 #include 
 
+#define WARNINGOUT(S, args...)RTE_LOG(WARNING, PMD, S, ##args)
 #define DEBUGOUT(S, args...)RTE_LOG(DEBUG, PMD, S, ##args)
 #define STATIC  static
 
-- 
1.8.3.1



[PATCH v2] vdpa/ifc/base: fix null pointer dereference

2022-07-08 Thread Andy Pei
Fix null pointer dereference reported in coverity scan.
Output some log information when lm_cfg is null.
Make sure lm_cfg is not null before operate on lm_cfg.

Coverity issue: 378882
Fixes: d7fe5a2861e7 ("net/ifc: support live migration")

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c   | 31 ---
 drivers/vdpa/ifc/base/ifcvf_osdep.h |  1 +
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 0a9f71a..f1e1474 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -87,6 +87,8 @@
}
 
hw->lm_cfg = hw->mem_resource[4].addr;
+   if (!hw->lm_cfg)
+   WARNINGOUT("HW support live migration not support!\n");
 
if (hw->common_cfg == NULL || hw->notify_base == NULL ||
hw->isr == NULL || hw->dev_cfg == NULL) {
@@ -218,17 +220,19 @@
&cfg->queue_used_hi);
IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-   if (hw->device_type == IFCVF_BLK)
-   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   i * IFCVF_LM_CFG_SIZE) =
-   (u32)hw->vring[i].last_avail_idx |
-   ((u32)hw->vring[i].last_used_idx << 16);
-   else
-   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE +
-   (i % 2) * 4) =
-   (u32)hw->vring[i].last_avail_idx |
-   ((u32)hw->vring[i].last_used_idx << 16);
+   if (lm_cfg) {
+   if (hw->device_type == IFCVF_BLK)
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   else
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
 
IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -320,6 +324,8 @@
u8 *lm_cfg;
 
lm_cfg = hw->lm_cfg;
+   if (!lm_cfg)
+   return;
 
*(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) =
log_base & IFCVF_32_BIT_MASK;
@@ -342,6 +348,9 @@
u8 *lm_cfg;
 
lm_cfg = hw->lm_cfg;
+   if (!lm_cfg)
+   return;
+
*(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE;
 }
 
diff --git a/drivers/vdpa/ifc/base/ifcvf_osdep.h 
b/drivers/vdpa/ifc/base/ifcvf_osdep.h
index 6aef25e..8a47fcb 100644
--- a/drivers/vdpa/ifc/base/ifcvf_osdep.h
+++ b/drivers/vdpa/ifc/base/ifcvf_osdep.h
@@ -14,6 +14,7 @@
 #include 
 #include 
 
+#define WARNINGOUT(S, args...)RTE_LOG(WARNING, PMD, S, ##args)
 #define DEBUGOUT(S, args...)RTE_LOG(DEBUG, PMD, S, ##args)
 #define STATIC  static
 
-- 
1.8.3.1



[PATCH] vdpa/ifc: fix log info mismatch

2022-01-13 Thread Andy Pei
Fix log info mismatch.

Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver")
Cc: sta...@dpdk.org

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..d10c1fd 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -94,12 +94,14 @@
return -1;
}
 
-   DEBUGOUT("capability mapping:\ncommon cfg: %p\n"
-   "notify base: %p\nisr cfg: %p\ndevice cfg: %p\n"
-   "multiplier: %u\n",
-   hw->common_cfg, hw->dev_cfg,
-   hw->isr, hw->notify_base,
-   hw->notify_off_multiplier);
+   DEBUGOUT("capability mapping:\n"
+"common cfg: %p\n"
+"notify base: %p\n"
+"isr cfg: %p\n"
+"device cfg: %p\n"
+"multiplier: %u\n",
+hw->common_cfg, hw->notify_base, hw->isr, hw->dev_cfg,
+hw->notify_off_multiplier);
 
return 0;
 }
-- 
1.8.3.1



[PATCH] vhost: add some log for vhost message VHOST_USER_SET_VRING_BASE

2022-01-13 Thread Andy Pei
Usually the last avail index and last used index is 0, but for target
device of live migration, the last avail index and last used index is
not 0. So I think some log is helpful.

Signed-off-by: Andy Pei 
---
 lib/vhost/vhost_user.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index a781346..3cb13fb 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -973,6 +973,11 @@
vq->last_avail_idx = msg->payload.state.num;
}
 
+   VHOST_LOG_CONFIG(INFO,
+   "vring base idx:%d last_used_idx:%u last_avail_idx:%u.\n",
+   msg->payload.state.index, vq->last_used_idx,
+   vq->last_avail_idx);
+
return RTE_VHOST_MSG_RESULT_OK;
 }
 
-- 
1.8.3.1



[PATCH v2] vhost: add log for VHOST_USER_SET_VRING_BASE

2022-01-14 Thread Andy Pei
This patch adds log for vring related info in handling of vhost message
VHOST_USER_SET_VRING_BASE, which will be useful in live migration case.

Signed-off-by: Andy Pei 
---
 lib/vhost/vhost_user.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index a781346..cd8c7bc 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -973,6 +973,11 @@
vq->last_avail_idx = msg->payload.state.num;
}
 
+   VHOST_LOG_CONFIG(INFO,
+   "vring base idx:%u last_used_idx:%u last_avail_idx:%u.\n",
+   msg->payload.state.index, vq->last_used_idx,
+   vq->last_avail_idx);
+
return RTE_VHOST_MSG_RESULT_OK;
 }
 
-- 
1.8.3.1



[PATCH 00/15] add virtio_blk device support to vdpa/ifc

2022-01-24 Thread Andy Pei
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
device
  vdpa/ifc: add some log at VDPA lauch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c|  42 ++-
 drivers/vdpa/ifc/base/ifcvf.h|  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c| 534 ---
 examples/vdpa/Makefile   |   2 +-
 examples/vdpa/main.c |   8 +
 examples/vdpa/meson.build|   1 +
 examples/vdpa/vdpa_blk_compact.c | 152 +++
 examples/vdpa/vdpa_blk_compact.h | 118 +
 examples/vdpa/vhost_user.h   | 190 ++
 lib/vhost/vdpa_driver.h  |   8 +-
 lib/vhost/vhost_user.c   |  15 ++
 usertools/dpdk-devbind.py|   8 +
 12 files changed, 1053 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1



[PATCH 01/15] vdpa/ifc: add support for virtio blk device

2022-01-24 Thread Andy Pei
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemeted with proper feature and ops.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 96 +++
 2 files changed, 102 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include 
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET  0
+#define IFCVF_BLK  1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_BLK_DEVICE_ID 0x0002
+
 #define IFCVF_VENDOR_ID0x1AF4
 #define IFCVF_DEVICE_ID0x1041
 #define IFCVF_SUBSYS_VENDOR_ID 0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK  0x
 
-
 struct ifcvf_pci_cap {
u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next;/* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
u8 notify_region;
u32notify_off_multiplier;
struct ifcvf_pci_common_cfg *common_cfg;
-   struct ifcvf_net_config *dev_cfg;
+   union {
+   struct ifcvf_net_config *net_cfg;
+   struct virtio_blk_config *blk_cfg;
+   void *dev_cfg;
+   };
u8 *isr;
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..48056d1 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
struct rte_vdpa_device *vdev;
uint16_t max_queues;
uint64_t features;
+   int device_type;
rte_atomic32_t started;
rte_atomic32_t dev_attached;
rte_atomic32_t running;
@@ -75,6 +76,14 @@ struct internal_list {
struct ifcvf_internal *internal;
 };
 
+/**
+** vdpa decice info includes device features and devcic operation.
+**/
+struct rte_vdpa_dev_info {
+   uint64_t features;
+   struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1179,50 @@ struct internal_list {
return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+   uint16_t pci_device_id = pci_dev->id.device_id;
+   uint16_t device_id;
+
+   if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+   DRV_LOG(ERR, "Probe device is not a virtio device\n");
+   return -1;
+   }
+
+   if (pci_device_id < 0x1040) {
+   /**
+   ** Transitional devices: use the PCI subsystem device id as
+   ** virtio device id, same as legacy driver always did.
+   **/
+   device_id = pci_dev->id.subsystem_device_id;
+   } else {
+   /**
+   ** Modern devices: simply use PCI device id,
+   ** but start from 0x1040.
+   **/
+   device_id = pci_device_id - 0x1040;
+   }
+
+   return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+   {
+   .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+   (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+   (1ULL << VIRTIO_NET_F_STATUS) |
+   (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+   (1ULL << VHOST_F_LOG_ALL),
+   .ops = &ifcvf_ops,
+   },
+   {
+   .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+   (1ULL << VHOST_F_LOG_ALL),
+   .ops = NULL,
+   },
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
@@ -1181,6 +1234,7 @@ struct internal_list {
int sw_fallback_lm = 0;
struct rte_kvargs *kvlist = NULL;
int ret = 0;
+   int16_t device_id;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1230,13 +1284,24 @@ struct internal_list {
internal->configured = 0;
internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
-   internal->features = (features &
-   ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-   (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-   (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-   (1ULL << VIRTIO_NET_F_STATUS

[PATCH 02/15] vhost: add vdpa ops for blk device

2022-01-24 Thread Andy Pei
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.

Signed-off-by: Andy Pei 
---
 lib/vhost/vdpa_driver.h | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
/** Reset statistics of the queue */
int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-   /** Reserved for future extension */
-   void *reserved[2];
+   /** Get the device configuration space */
+   int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+   /** Set the device configuration space */
+   int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+ uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1



[PATCH 03/15] vdpa/ifc: add blk ops for ifc device

2022-01-24 Thread Andy Pei
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 88 ++-
 2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK  0x
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
 struct ifcvf_pci_cap {
u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next;/* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 48056d1..965baa2 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1093,6 +1093,10 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+   (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1207,6 +1211,88 @@ struct rte_vdpa_dev_info {
return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+   struct virtio_blk_config *dev_cfg;
+   struct ifcvf_internal *internal;
+   struct rte_vdpa_device *vdev;
+   struct internal_list *list;
+   uint32_t i;
+   __u64 capacity = 0;
+   uint8_t *byte;
+
+   if (len < sizeof(struct virtio_blk_config)) {
+   DRV_LOG(ERR, "Invalid len: %u, required: %lu",
+   len, sizeof(struct virtio_blk_config));
+   return -1;
+   }
+
+   vdev = rte_vhost_get_vdpa_device(vid);
+   list = find_internal_resource_by_vdev(vdev);
+   if (list == NULL) {
+   DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+   return -1;
+   }
+
+   internal = list->internal;
+
+   for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+   config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+   dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+   /**
+   ** cannot read 64-bit register in one attempt,
+   ** so read byte by byte.
+   **/
+   for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+   byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+   capacity |= (__u64)*byte << (i * 8);
+   }
+   DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+   DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+   DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+   DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+   DRV_LOG(INFO, "geometry");
+   DRV_LOG(INFO, "  cylinders: %u", dev_cfg->geometry.cylinders);
+   DRV_LOG(INFO, "  heads: %u", dev_cfg->geometry.heads);
+   DRV_LOG(INFO, "  sectors  : %u", dev_cfg->geometry.sectors);
+   DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+   DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+   config[0], config[1], config[2], config[3], config[4],
+   config[5], config[6], config[7]);
+   return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+   uint64_t *features)
+{
+   RTE_SET_USED(vdev);
+
+   *features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+   *features |= VDPA_BLK_PROTOCOL_FEATURES;
+   return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+   .get_queue_num = ifcvf_get_queue_num,
+   .get_features = ifcvf_get_vdpa_features,
+   .set_features = ifcvf_set_features,
+   .get_protocol_features = ifcvf_blk_get_protocol_features,
+   .dev_conf = ifcvf_dev_config,
+   .dev_close = ifcvf_dev_close,
+   .set_vring_state = NULL,
+   .migration_done = NULL,
+   .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+   .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+   .get_notify_area = ifcvf_get_notify_area,
+   .get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
{
.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1219,7 +1305,7 @@ struct rte_vdpa_dev_info dev_info[] = {
{
.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
(1ULL << VHOST_F_LOG_ALL),
-   .ops = NULL,
+   .ops = &ifcvf_blk_ops,
},
 };
 
-- 
1.8.3.1



[PATCH 04/15] vdpa/ifc: add vdpa interrupt for blk device

2022-01-24 Thread Andy Pei
For the blk we need to relay all the cmd of each queue.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 48 +--
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 965baa2..9729490 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -374,24 +374,50 @@ struct rte_vdpa_dev_info {
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *)&irq_set->data;
+   /* The first interrupt is for the configure space change notification */
fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
rte_intr_fd_get(internal->pdev->intr_handle);
 
for (i = 0; i < nr_vring; i++)
internal->intr_fd[i] = -1;
 
-   for (i = 0; i < nr_vring; i++) {
-   rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-   if ((i & 1) == 0 && m_rx == true) {
-   fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-   if (fd < 0) {
-   DRV_LOG(ERR, "can't setup eventfd: %s",
-   strerror(errno));
-   return -1;
+   if (internal->device_type == IFCVF_NET) {
+   for (i = 0; i < nr_vring; i++) {
+   rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+   if ((i & 1) == 0 && m_rx == true) {
+   /**
+   ** For the net we only need to relay rx queue,
+   ** which will change the mem of VM.
+   **/
+   fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+   if (fd < 0) {
+   DRV_LOG(ERR, "can't setup eventfd: %s",
+   strerror(errno));
+   return -1;
+   }
+   internal->intr_fd[i] = fd;
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+   }
+   }
+   } else if (internal->device_type == IFCVF_BLK) {
+   for (i = 0; i < nr_vring; i++) {
+   rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+   if (m_rx == true) {
+   /**
+   ** For the blk we need to relay all the read cmd
+   ** of each queue
+   **/
+   fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+   if (fd < 0) {
+   DRV_LOG(ERR, "can't setup eventfd: %s",
+   strerror(errno));
+   return -1;
+   }
+   internal->intr_fd[i] = fd;
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
}
-   internal->intr_fd[i] = fd;
-   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
}
}
 
-- 
1.8.3.1



[PATCH 05/15] vdpa/ifc: add blk dev sw live migration

2022-01-24 Thread Andy Pei
Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty. Maybe we can 
improve it later.

Signed-off-by: Jin Yu 
Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 130 +++---
 3 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 9729490..1f832a3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -334,10 +334,68 @@ struct rte_vdpa_dev_info {
 
rte_vhost_get_negotiated_features(vid, &features);
if (RTE_VHOST_NEED_LOG(features)) {
-   ifcvf_disable_logging(hw);
-   rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-   rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-   log_base, IFCVF_LOG_BASE, log_size);
+   if (internal->device_type == IFCVF_NET) {
+   ifcvf_disable_logging(hw);
+   rte_vhost_get_log_base(internal->vid, &log_base,
+   &log_size);
+   rte_vfio_container_dma_unmap(
+   internal->vfio_container_fd, log_base,
+   IFCVF_LOG_BASE, log_size);
+   }
+   /**
+   ** IFCVF marks dirty memory pages for only packet buffer,
+   ** SW helps to mark the used ring as dirty after device stops.
+   **/
+   for (i = 0; i < hw->nr_vring; i++) {
+   len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+   rte_vhost_log_used_vring(vid, i, 0, len);
+   }
+   }
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+   struct ifcvf_hw *hw = &internal->hw;
+   struct rte_vhost_vring vq;
+   int i, vid;
+   uint64_t features = 0;
+   uint64_t log_base = 0, log_size = 0;
+   uint64_t len;
+
+   vid = internal->vid;
+
+   if (internal->device_type == IFCVF_BLK) {
+   for (i = 0; i < hw->nr_vring; i++) {
+   rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+   while (vq.avail->idx != vq.used->idx) {
+   ifcvf_notify_queue(hw, i);
+   usleep(10);
+   }
+   hw->vring[i].last_avail_idx = vq.avail->idx;
+   hw->vring[i].last_used_idx = vq.used->idx;
+   }
+   }
+
+   ifcvf_hw_disable(hw);
+
+   for (i = 0; i < hw->nr_vring; i++)
+   rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+   hw->vring[i].last_used_idx);
+
+   if (internal->sw_lm)
+   return;
+
+   rte_vhost_get_negotiated_features(vid, &features);
+   if (RTE_VHOST_NEED_LOG(features)) {
+   if (internal->device_type == IFCVF_NET) {
+   ifcvf_disable_logging(hw);
+   rte_vhost_get_log_base(internal->vid, &log_base,
+   &log_size);
+   rte_vfio_container_dma_unmap(
+   internal->vfio_container_fd, log_base,
+   IFCVF_LOG_BASE, log_size);
+   }
/*
 * IFCVF marks dirty memory pages for only packet buffer,
 * SW helps to mark the used ring as dirty after device stops.
@@ -665,15 +723,18 @@ struct rte_vdpa_dev_info {
}
hw->vring[i].avail = gpa;
 
-   /* Direct I/O for Tx queue, relay for Rx queue */
-   if (i & 1) {
+   /**
+   ** NETWORK: Direct I/O for Tx queue, relay for 

[PATCH 06/15] example/vdpa:add vdpa blk support in example

2022-01-24 Thread Andy Pei
Signed-off-by: Andy Pei 
---
 examples/vdpa/Makefile   |   2 +-
 examples/vdpa/main.c |   8 ++
 examples/vdpa/meson.build|   1 +
 examples/vdpa/vdpa_blk_compact.c | 152 +++
 examples/vdpa/vdpa_blk_compact.h | 118 
 examples/vdpa/vhost_user.h   | 190 +++
 6 files changed, 470 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..3fa3d3a 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
.new_device = new_device,
.destroy_device = destroy_device,
+   .new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
"attach vdpa device failed: %s\n",
socket_path);
 
+   if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+   < 0)
+   rte_exit(EXIT_FAILURE,
+   "set vhost blk driver features and protocal features 
failed: %s\n",
+   socket_path);
+
if (rte_vhost_driver_start(socket_path) < 0)
rte_exit(EXIT_FAILURE,
"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
 'main.c',
+   'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 000..7310ebb
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,152 @@
+/*
+**INTEL CONFIDENTIAL
+**
+**Copyright (c) Intel Corporation.
+**All rights reserved.
+**
+**The source code contained or described herein and all documents related
+**to the source code ("Material") are owned by Intel Corporation or its
+**suppliers or licensors.  Title to the Material remains with Intel
+**Corporation or its suppliers and licensors.  The Material contains trade
+**secrets and proprietary and confidential information of Intel or its
+**suppliers and licensors.  The Material is protected by worldwide
+**copyright and trade secret laws and treaty provisions.  No part of the
+**Material may be used, copied, reproduced, modified, published, uploaded,
+**posted, transmitted, distributed, or disclosed in any way without Intel's
+**prior express written permission.
+**
+**No license under any patent, copyright, trade secret or other
+**intellectual property right is granted to or conferred upon you by
+**disclosure or delivery of the Materials, either expressly, by
+**implication, inducement, estoppel or otherwise.  Any license under such
+**intellectual property rights must be express and approved by Intel in
+**writing.
+*/
+
+/**
+** @file
+**
+** Block device specific vhost lib
+**/
+
+#include 
+
+#include 
+#include 
+#include 
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG  24
+#define VHOST_USER_SET_CONFIG  25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+   struct VhostUserMsg *msg = _msg;
+   struct rte_vdpa_device *vdev = NULL;
+
+   vdev = rte_vhost_get_vdpa_device(vid);
+   if (vdev == NULL)
+   return RTE_VHOST_MSG_RESULT_ERR;
+
+   fprintf(stderr, "msg is %d\n", msg->request.master);
+   switch (msg->request.master) {
+   case VHOST_USER_GET_CONFIG: {
+   int rc = 0;
+
+   fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+   if (vdev->ops->get_config) {
+   fprintf(stdout, "get_config() fun

[PATCH 07/15] usertools: add support for virtio blk device

2022-01-24 Thread Andy Pei
Signed-off-by: Andy Pei 
---
 usertools/dpdk-devbind.py | 8 
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
 Displays to the user what devices are bound to the igb_uio driver, the
 kernel driver or to no driver'''
 
+if status_dev in ["virtio_blk", "all"]:
+show_device_status(virtio_blk_devices, "virtio_blk")
+
 if status_dev in ["net", "all"]:
 show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
 if b_flag is not None:
 clear_data()
 # refresh if we have changed anything
+get_device_details(virtio_blk_devices)
 get_device_details(network_devices)
 get_device_details(baseband_devices)
 get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
 parse_args()
 check_modules()
 clear_data()
+get_device_details(virtio_blk_devices)
 get_device_details(network_devices)
 get_device_details(baseband_devices)
 get_device_details(crypto_devices)
-- 
1.8.3.1



[PATCH 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device

2022-01-24 Thread Andy Pei
Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 1f832a3..eff6ff3 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1384,6 +1384,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+   RTE_SET_USED(vid);
+   RTE_SET_USED(vring);
+   RTE_SET_USED(state);
+
+   return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
uint64_t *features)
 {
@@ -1401,7 +1411,7 @@ struct rte_vdpa_dev_info {
.get_protocol_features = ifcvf_blk_get_protocol_features,
.dev_conf = ifcvf_dev_config,
.dev_close = ifcvf_dev_close,
-   .set_vring_state = NULL,
+   .set_vring_state = ifcvf_blk_set_vring_state,
.migration_done = NULL,
.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1



[PATCH 09/15] vdpa/ifc: add some log at VDPA lauch before qemu connect

2022-01-24 Thread Andy Pei
Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index eff6ff3..0b4b77f 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1447,6 +1447,9 @@ struct rte_vdpa_dev_info dev_info[] = {
struct rte_kvargs *kvlist = NULL;
int ret = 0;
int16_t device_id;
+   __u64 capacity = 0;
+   uint8_t *byte;
+   uint32_t i;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1513,6 +1516,32 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_BLK].features;
+
+   /**
+   ** cannot read 64-bit register in one attempt,
+   ** so read byte by byte.
+   **/
+   for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+   byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+   capacity |= (__u64)*byte << (i * 8);
+   }
+   DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+   DRV_LOG(INFO, "size_max  : 0x%08x",
+   internal->hw.blk_cfg->size_max);
+   DRV_LOG(INFO, "seg_max   : 0x%08x",
+   internal->hw.blk_cfg->seg_max);
+   DRV_LOG(INFO, "blk_size  : 0x%08x",
+   internal->hw.blk_cfg->blk_size);
+   DRV_LOG(INFO, "geometry");
+   DRV_LOG(INFO, "cylinders: %u",
+   internal->hw.blk_cfg->geometry.cylinders);
+   DRV_LOG(INFO, "heads: %u",
+   internal->hw.blk_cfg->geometry.heads);
+   DRV_LOG(INFO, "sectors  : %u",
+   internal->hw.blk_cfg->geometry.sectors);
+   DRV_LOG(INFO, "num_queues: 0x%08x",
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH 10/15] vdpa/ifc: read virtio max_queues from hardware

2022-01-24 Thread Andy Pei
original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 0b4b77f..f092aca 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1542,6 +1542,10 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(INFO, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   /* reset max_queue here, to minimum modification */
+   internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH 11/15] vdpa: add config space change interrupt register and handle for virtio_blk

2022-01-24 Thread Andy Pei
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 113 ++
 1 file changed, 113 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index f092aca..2552375 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
int vfio_group_fd;
int vfio_dev_fd;
pthread_t tid;  /* thread for notify relay */
+   pthread_t intr_tid; /* thread for intr relay */
int epfd;
+   int csc_fd;
int vid;
struct rte_vdpa_device *vdev;
uint16_t max_queues;
@@ -622,6 +624,108 @@ struct rte_vdpa_dev_info {
return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+   int vid = internal->vid;
+   int ret;
+
+   ret = rte_vhost_slave_config_change(vid, 1);
+   if (ret)
+   DRV_LOG(ERR, "failed to notify the guest about configuration 
space change.");
+
+   return;
+}
+
+static void *
+intr_relay(void *arg)
+{
+   struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+   struct epoll_event csc_event;
+   struct epoll_event ev;
+   uint64_t buf;
+   int nbytes;
+   int csc_fd, csc_val = 0;
+
+   csc_fd = epoll_create(1);
+   if (csc_fd < 0) {
+   DRV_LOG(ERR, "failed to create epoll for config space change.");
+   return NULL;
+   }
+
+   ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+   ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+   if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+   rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+   DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+   return NULL;
+   }
+
+   internal->csc_fd = csc_fd;
+
+   for (;;) {
+   csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+   if (csc_val < 0) {
+   if (errno == EINTR)
+   continue;
+   DRV_LOG(ERR, "epoll_wait return fail\n");
+   return NULL;
+   } else if (csc_val == 0) {
+   continue;
+   } else {
+   /* csc_val > 0 */
+   nbytes = read(csc_event.data.fd, &buf, 8);
+   if (nbytes < 0) {
+   if (errno == EINTR || errno == EWOULDBLOCK)
+   continue;
+   DRV_LOG(ERR, "Error reading from file 
descriptor %d: %s\n",
+   csc_event.data.fd,
+   strerror(errno));
+   return NULL;
+   } else if (nbytes == 0) {
+   DRV_LOG(ERR, "Read nothing from file descriptor 
%d\n",
+   csc_event.data.fd);
+   continue;
+   } else {
+   virtio_interrupt_handler(internal);
+   }
+   }
+   }
+   return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+   int ret;
+
+   ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+   (void *)internal);
+   if (ret) {
+   DRV_LOG(ERR, "failed to create notify relay pthread.");
+   return -1;
+   }
+   return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+   void *status;
+
+   if (internal->intr_tid) {
+   pthread_cancel(internal->intr_tid);
+   pthread_join(internal->intr_tid, &status);
+   }
+   internal->intr_tid = 0;
+
+   if (internal->csc_fd >= 0)
+   close(internal->csc_fd);
+   internal->csc_fd = -1;
+
+   return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -648,10 +752,16 @@ struct rte_vdpa_dev_info {
if (ret)
goto err;
 
+   ret = setup_intr_relay(internal);
+   if (ret)
+   goto err;
+
rte_atomic32_set(&internal->running, 1);
} else if (rte_atomic32_read(&internal->running) &&
   (!rte_atomic32_read(&internal->started) ||
!rte_atomic32_read(&internal->dev_attached))) {
+   ret = unset_intr_relay(internal);
+
ret = unset_notify_relay(internal);
if (ret)
goto

[PATCH 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe

2022-01-24 Thread Andy Pei
Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
u8 *lm_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
+   u8 is_blk;
struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 2552375..546f9bd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1621,11 +1621,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->device_type = IFCVF_NET;
+   internal->hw.is_blk = IFCVF_NET;
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
} else if (device_id == VIRTIO_ID_BLOCK) {
internal->device_type = IFCVF_BLK;
+   internal->hw.is_blk = IFCVF_BLK;
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1



[PATCH 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device

2022-01-24 Thread Andy Pei
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +---
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
&cfg->queue_used_hi);
IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-   (u32)hw->vring[i].last_avail_idx |
-   ((u32)hw->vring[i].last_used_idx << 16);
+   if (hw->is_blk == IFCVF_BLK) {
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   } else if (hw->is_blk == IFCVF_NET) {
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
 
IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
IFCVF_WRITE_REG16(i, &cfg->queue_select);
IFCVF_WRITE_REG16(0, &cfg->queue_enable);
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-   ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+   if (hw->is_blk) {
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   } else if (hw->is_blk == IFCVF_NET) {
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   }
+
+   if (hw->is_blk == IFCVF_BLK)
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   else if (hw->is_blk == IFCVF_NET)
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING   0x2000
 
 #define IFCVF_32_BIT_MASK  0x
+#define IFCVF_16_BIT_MASK  0x
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG   9
-- 
1.8.3.1



[PATCH 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause

2022-01-24 Thread Andy Pei
Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++-
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 546f9bd..ff233bc 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -359,23 +359,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
struct ifcvf_hw *hw = &internal->hw;
-   struct rte_vhost_vring vq;
int i, vid;
uint64_t features = 0;
uint64_t log_base = 0, log_size = 0;
uint64_t len;
+   u32 ring_state = 0;
 
vid = internal->vid;
 
if (internal->device_type == IFCVF_BLK) {
for (i = 0; i < hw->nr_vring; i++) {
-   rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-   while (vq.avail->idx != vq.used->idx) {
-   ifcvf_notify_queue(hw, i);
-   usleep(10);
-   }
-   hw->vring[i].last_avail_idx = vq.avail->idx;
-   hw->vring[i].last_used_idx = vq.used->idx;
+   do {
+   if (hw->lm_cfg != NULL)
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   hw->vring[i].last_used_idx =
+   (u16)(ring_state >> 16);
+   if (hw->vring[i].last_avail_idx !=
+   hw->vring[i].last_used_idx) {
+   ifcvf_notify_queue(hw, i);
+   usleep(10);
+   }
+   } while (hw->vring[i].last_avail_idx !=
+   hw->vring[i].last_used_idx);
}
}
 
@@ -766,7 +775,12 @@ struct rte_vdpa_dev_info {
if (ret)
goto err;
 
-   vdpa_ifcvf_stop(internal);
+   if (internal->device_type == IFCVF_BLK) {
+   vdpa_ifcvf_blk_pause(internal);
+   ifcvf_reset(&internal->hw);
+   } else {
+   vdpa_ifcvf_stop(internal);
+   }
 
ret = vdpa_disable_vfio_intr(internal);
if (ret)
-- 
1.8.3.1



[PATCH 15/15] vhost: make sure each queue callfd is configured

2022-01-24 Thread Andy Pei
During the vhost data path building process, qemu will create a call fd at 
first,
and create another call fd in the end. The final call fd will be used to relay 
notify.
In the original code, after kick fd is set, dev_conf will set the first
call fd. Even though the actual call fd will set, the data path will not work 
correctly.

Signed-off-by: Andy Pei 
---
 lib/vhost/vhost_user.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..0be879a 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,27 @@ typedef int (*vhost_message_handler_t)(struct 
virtio_net **pdev,
if (!vdpa_dev)
goto out;
 
+   if (request != VHOST_USER_SET_VRING_CALL)
+   goto out;
+
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
VHOST_LOG_CONFIG(ERR,
 "Failed to configure vDPA device\n");
else
dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+   } else {
+   /**
+   ** when VIRTIO_DEV_VDPA_CONFIGURED already configured
+   ** close the device and config the device again,
+   ** make sure the call fd of each queue is configed correctly.
+   **/
+   if (vdpa_dev->ops->dev_close(dev->vid))
+   VHOST_LOG_CONFIG(ERR,
+"Failed to close vDPA device\n");
+   if (vdpa_dev->ops->dev_conf(dev->vid))
+   VHOST_LOG_CONFIG(ERR,
+"Failed to re-config vDPA device\n");
}
 
 out:
-- 
1.8.3.1



[PATCH v2 00/15] add virtio_blk device support to vdpa/ifc

2022-01-25 Thread Andy Pei
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v2:
 Fix some coding style issue.

Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the
same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c|  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h|  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c| 520 ---
 examples/vdpa/Makefile   |   2 +-
 examples/vdpa/main.c |   8 +
 examples/vdpa/meson.build|   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++
 examples/vdpa/vdpa_blk_compact.h | 117 +
 examples/vdpa/vhost_user.h   | 189 ++
 lib/vhost/vdpa_driver.h  |   8 +-
 lib/vhost/vhost_user.c   |  14 ++
 usertools/dpdk-devbind.py|   8 +
 12 files changed, 1034 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1



[PATCH v2 01/15] vdpa/ifc: add support for virtio blk device

2022-01-25 Thread Andy Pei
Re-use the vdpa/ifc code, distinguish blk and net device by pci_device_id.
Blk and net device are implemented with proper feature and ops.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.h | 16 +++-
 drivers/vdpa/ifc/ifcvf_vdpa.c | 92 +++
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 573a35f..01522c6 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -5,8 +5,17 @@
 #ifndef _IFCVF_H_
 #define _IFCVF_H_
 
+#include 
 #include "ifcvf_osdep.h"
 
+#define IFCVF_NET  0
+#define IFCVF_BLK  1
+
+/* for BLK */
+#define IFCVF_BLK_TRANSITIONAL_DEVICE_ID0x1001
+#define IFCVF_BLK_MODERN_DEVICE_ID  0x1042
+#define IFCVF_BLK_DEVICE_ID 0x0002
+
 #define IFCVF_VENDOR_ID0x1AF4
 #define IFCVF_DEVICE_ID0x1041
 #define IFCVF_SUBSYS_VENDOR_ID 0x8086
@@ -57,7 +66,6 @@
 
 #define IFCVF_32_BIT_MASK  0x
 
-
 struct ifcvf_pci_cap {
u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next;/* Generic PCI field: next ptr. */
@@ -126,7 +134,11 @@ struct ifcvf_hw {
u8 notify_region;
u32notify_off_multiplier;
struct ifcvf_pci_common_cfg *common_cfg;
-   struct ifcvf_net_config *dev_cfg;
+   union {
+   struct ifcvf_net_config *net_cfg;
+   struct virtio_blk_config *blk_cfg;
+   void *dev_cfg;
+   };
u8 *isr;
u16*notify_base;
u16*notify_addr[IFCVF_MAX_QUEUES * 2];
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 3853c4c..96b67dd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -58,6 +58,7 @@ struct ifcvf_internal {
struct rte_vdpa_device *vdev;
uint16_t max_queues;
uint64_t features;
+   int device_type;
rte_atomic32_t started;
rte_atomic32_t dev_attached;
rte_atomic32_t running;
@@ -75,6 +76,12 @@ struct internal_list {
struct ifcvf_internal *internal;
 };
 
+/* vdpa device info includes device features and devcic operation. */
+struct rte_vdpa_dev_info {
+   uint64_t features;
+   struct rte_vdpa_dev_ops *ops;
+};
+
 TAILQ_HEAD(internal_list_head, internal_list);
 static struct internal_list_head internal_list =
TAILQ_HEAD_INITIALIZER(internal_list);
@@ -1170,6 +1177,48 @@ struct internal_list {
return 0;
 }
 
+static int16_t
+ifcvf_pci_get_device_type(struct rte_pci_device *pci_dev)
+{
+   uint16_t pci_device_id = pci_dev->id.device_id;
+   uint16_t device_id;
+
+   if (pci_device_id < 0x1000 || pci_device_id > 0x107f) {
+   DRV_LOG(ERR, "Probe device is not a virtio device\n");
+   return -1;
+   }
+
+   if (pci_device_id < 0x1040) {
+   /* Transitional devices: use the PCI subsystem device id as
+* virtio device id, same as legacy driver always did.
+*/
+   device_id = pci_dev->id.subsystem_device_id;
+   } else {
+   /* Modern devices: simply use PCI device id,
+* but start from 0x1040.
+*/
+   device_id = pci_device_id - 0x1040;
+   }
+
+   return device_id;
+}
+
+struct rte_vdpa_dev_info dev_info[] = {
+   {
+   .features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+   (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+   (1ULL << VIRTIO_NET_F_STATUS) |
+   (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+   (1ULL << VHOST_F_LOG_ALL),
+   .ops = &ifcvf_ops,
+   },
+   {
+   .features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+   (1ULL << VHOST_F_LOG_ALL),
+   .ops = NULL,
+   },
+};
+
 static int
 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
@@ -1181,6 +1230,7 @@ struct internal_list {
int sw_fallback_lm = 0;
struct rte_kvargs *kvlist = NULL;
int ret = 0;
+   int16_t device_id;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1230,13 +1280,24 @@ struct internal_list {
internal->configured = 0;
internal->max_queues = IFCVF_MAX_QUEUES;
features = ifcvf_get_features(&internal->hw);
-   internal->features = (features &
-   ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-   (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
-   (1ULL << VIRTIO_NET_F_CTRL_VQ) |
-   (1ULL << VIRTIO_NET_F_STATUS) |
-   (1ULL << VHOST_USER_F_P

[PATCH v2 02/15] vhost: add vdpa ops for blk device

2022-01-25 Thread Andy Pei
Get_config and set_config are necessary ops for blk device.
Add get_config and set_config ops to vdpa ops.

Signed-off-by: Andy Pei 
---
 lib/vhost/vdpa_driver.h | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/vhost/vdpa_driver.h b/lib/vhost/vdpa_driver.h
index fc2d6ac..9a23db9 100644
--- a/lib/vhost/vdpa_driver.h
+++ b/lib/vhost/vdpa_driver.h
@@ -65,8 +65,12 @@ struct rte_vdpa_dev_ops {
/** Reset statistics of the queue */
int (*reset_stats)(struct rte_vdpa_device *dev, int qid);
 
-   /** Reserved for future extension */
-   void *reserved[2];
+   /** Get the device configuration space */
+   int (*get_config)(int vid, uint8_t *config, uint32_t len);
+
+   /** Set the device configuration space */
+   int (*set_config)(int vid, uint8_t *config, uint32_t offset,
+ uint32_t size, uint32_t flags);
 };
 
 /**
-- 
1.8.3.1



[PATCH v2 03/15] vdpa/ifc: add blk ops for ifc device

2022-01-25 Thread Andy Pei
For virtio blk device, re-use part of ifc driver ops.
Implement ifcvf_blk_get_config for virtio blk device.
Support VHOST_USER_PROTOCOL_F_CONFIG feature for virtio
blk device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.h |  4 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 85 ++-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 01522c6..769c603 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -66,6 +66,10 @@
 
 #define IFCVF_32_BIT_MASK  0x
 
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
 struct ifcvf_pci_cap {
u8 cap_vndr;/* Generic PCI field: PCI_CAP_ID_VNDR */
u8 cap_next;/* Generic PCI field: next ptr. */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 96b67dd..57fdd2c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1091,6 +1091,10 @@ struct rte_vdpa_dev_info {
 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | \
 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | \
 1ULL << VHOST_USER_PROTOCOL_F_STATUS)
+
+#define VDPA_BLK_PROTOCOL_FEATURES \
+   (1ULL << VHOST_USER_PROTOCOL_F_CONFIG)
+
 static int
 ifcvf_get_protocol_features(struct rte_vdpa_device *vdev, uint64_t *features)
 {
@@ -1203,6 +1207,85 @@ struct rte_vdpa_dev_info {
return device_id;
 }
 
+static int
+ifcvf_blk_get_config(int vid, uint8_t *config, uint32_t len)
+{
+   struct virtio_blk_config *dev_cfg;
+   struct ifcvf_internal *internal;
+   struct rte_vdpa_device *vdev;
+   struct internal_list *list;
+   uint32_t i;
+   __u64 capacity = 0;
+   uint8_t *byte;
+
+   if (len < sizeof(struct virtio_blk_config)) {
+   DRV_LOG(ERR, "Invalid len: %u, required: %lu",
+   len, sizeof(struct virtio_blk_config));
+   return -1;
+   }
+
+   vdev = rte_vhost_get_vdpa_device(vid);
+   list = find_internal_resource_by_vdev(vdev);
+   if (list == NULL) {
+   DRV_LOG(ERR, "Invalid vDPA device: %p", vdev);
+   return -1;
+   }
+
+   internal = list->internal;
+
+   for (i = 0; i < sizeof(struct virtio_blk_config); i++)
+   config[i] = *((u8 *)internal->hw.blk_cfg + i);
+
+   dev_cfg = (struct virtio_blk_config *)internal->hw.blk_cfg;
+
+   /* cannot read 64-bit register in one attempt, so read byte by byte. */
+   for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+   byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+   capacity |= (__u64)*byte << (i * 8);
+   }
+   DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+   DRV_LOG(INFO, "size_max  : 0x%08x", dev_cfg->size_max);
+   DRV_LOG(INFO, "seg_max   : 0x%08x", dev_cfg->seg_max);
+   DRV_LOG(INFO, "blk_size  : 0x%08x", dev_cfg->blk_size);
+   DRV_LOG(INFO, "geometry");
+   DRV_LOG(INFO, "  cylinders: %u", dev_cfg->geometry.cylinders);
+   DRV_LOG(INFO, "  heads: %u", dev_cfg->geometry.heads);
+   DRV_LOG(INFO, "  sectors  : %u", dev_cfg->geometry.sectors);
+   DRV_LOG(INFO, "num_queues: 0x%08x", dev_cfg->num_queues);
+
+   DRV_LOG(INFO, "config: [%x] [%x] [%x] [%x] [%x] [%x] [%x] [%x]\n",
+   config[0], config[1], config[2], config[3], config[4],
+   config[5], config[6], config[7]);
+   return 0;
+}
+
+static int
+ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
+   uint64_t *features)
+{
+   RTE_SET_USED(vdev);
+
+   *features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
+   *features |= VDPA_BLK_PROTOCOL_FEATURES;
+   return 0;
+}
+
+static struct rte_vdpa_dev_ops ifcvf_blk_ops = {
+   .get_queue_num = ifcvf_get_queue_num,
+   .get_features = ifcvf_get_vdpa_features,
+   .set_features = ifcvf_set_features,
+   .get_protocol_features = ifcvf_blk_get_protocol_features,
+   .dev_conf = ifcvf_dev_config,
+   .dev_close = ifcvf_dev_close,
+   .set_vring_state = NULL,
+   .migration_done = NULL,
+   .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
+   .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
+   .get_notify_area = ifcvf_get_notify_area,
+   .get_config = ifcvf_blk_get_config,
+};
+
 struct rte_vdpa_dev_info dev_info[] = {
{
.features = (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
@@ -1215,7 +1298,7 @@ struct rte_vdpa_dev_info dev_info[] = {
{
.features = (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
(1ULL << VHOST_F_LOG_ALL),
-   .ops = NULL,
+   .ops = &ifcvf_blk_ops,
},
 };
 
-- 
1.8.3.1



[PATCH v2 04/15] vdpa/ifc: add vdpa interrupt for blk device

2022-01-25 Thread Andy Pei
For the blk we need to relay all the cmd of each queue.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 46 ---
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 57fdd2c..ef5b36c 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -372,24 +372,48 @@ struct rte_vdpa_dev_info {
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *)&irq_set->data;
+   /* The first interrupt is for the configure space change notification */
fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] =
rte_intr_fd_get(internal->pdev->intr_handle);
 
for (i = 0; i < nr_vring; i++)
internal->intr_fd[i] = -1;
 
-   for (i = 0; i < nr_vring; i++) {
-   rte_vhost_get_vhost_vring(internal->vid, i, &vring);
-   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
-   if ((i & 1) == 0 && m_rx == true) {
-   fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-   if (fd < 0) {
-   DRV_LOG(ERR, "can't setup eventfd: %s",
-   strerror(errno));
-   return -1;
+   if (internal->device_type == IFCVF_NET) {
+   for (i = 0; i < nr_vring; i++) {
+   rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+   if ((i & 1) == 0 && m_rx == true) {
+   /* For the net we only need to relay rx queue,
+* which will change the mem of VM.
+*/
+   fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+   if (fd < 0) {
+   DRV_LOG(ERR, "can't setup eventfd: %s",
+   strerror(errno));
+   return -1;
+   }
+   internal->intr_fd[i] = fd;
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
+   }
+   }
+   } else if (internal->device_type == IFCVF_BLK) {
+   for (i = 0; i < nr_vring; i++) {
+   rte_vhost_get_vhost_vring(internal->vid, i, &vring);
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+   if (m_rx == true) {
+   /* For the blk we need to relay all the read cmd
+* of each queue
+*/
+   fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+   if (fd < 0) {
+   DRV_LOG(ERR, "can't setup eventfd: %s",
+   strerror(errno));
+   return -1;
+   }
+   internal->intr_fd[i] = fd;
+   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
}
-   internal->intr_fd[i] = fd;
-   fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd;
}
}
 
-- 
1.8.3.1



[PATCH v2 05/15] vdpa/ifc: add blk dev sw live migration

2022-01-25 Thread Andy Pei
Enable virtio blk sw live migration relay callfd and log the dirty page.
In this version we ignore the write cmd and still mark it dirty.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c |   4 +-
 drivers/vdpa/ifc/base/ifcvf.h |   6 ++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 128 +++---
 3 files changed, 116 insertions(+), 22 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 721cb1d..3a69e53 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -189,7 +189,7 @@
IFCVF_WRITE_REG32(val >> 32, hi);
 }
 
-STATIC int
+int
 ifcvf_hw_enable(struct ifcvf_hw *hw)
 {
struct ifcvf_pci_common_cfg *cfg;
@@ -238,7 +238,7 @@
return 0;
 }
 
-STATIC void
+void
 ifcvf_hw_disable(struct ifcvf_hw *hw)
 {
u32 i;
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 769c603..6dd7925 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -179,4 +179,10 @@ struct ifcvf_hw {
 u64
 ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid);
 
+int
+ifcvf_hw_enable(struct ifcvf_hw *hw);
+
+void
+ifcvf_hw_disable(struct ifcvf_hw *hw);
+
 #endif /* _IFCVF_H_ */
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ef5b36c..14bc5c8 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -332,10 +332,67 @@ struct rte_vdpa_dev_info {
 
rte_vhost_get_negotiated_features(vid, &features);
if (RTE_VHOST_NEED_LOG(features)) {
-   ifcvf_disable_logging(hw);
-   rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
-   rte_vfio_container_dma_unmap(internal->vfio_container_fd,
-   log_base, IFCVF_LOG_BASE, log_size);
+   if (internal->device_type == IFCVF_NET) {
+   ifcvf_disable_logging(hw);
+   rte_vhost_get_log_base(internal->vid, &log_base,
+   &log_size);
+   rte_vfio_container_dma_unmap(
+   internal->vfio_container_fd, log_base,
+   IFCVF_LOG_BASE, log_size);
+   }
+   /* IFCVF marks dirty memory pages for only packet buffer,
+* SW helps to mark the used ring as dirty after device stops.
+*/
+   for (i = 0; i < hw->nr_vring; i++) {
+   len = IFCVF_USED_RING_LEN(hw->vring[i].size);
+   rte_vhost_log_used_vring(vid, i, 0, len);
+   }
+   }
+}
+
+static void
+vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
+{
+   struct ifcvf_hw *hw = &internal->hw;
+   struct rte_vhost_vring vq;
+   int i, vid;
+   uint64_t features = 0;
+   uint64_t log_base = 0, log_size = 0;
+   uint64_t len;
+
+   vid = internal->vid;
+
+   if (internal->device_type == IFCVF_BLK) {
+   for (i = 0; i < hw->nr_vring; i++) {
+   rte_vhost_get_vhost_vring(internal->vid, i, &vq);
+   while (vq.avail->idx != vq.used->idx) {
+   ifcvf_notify_queue(hw, i);
+   usleep(10);
+   }
+   hw->vring[i].last_avail_idx = vq.avail->idx;
+   hw->vring[i].last_used_idx = vq.used->idx;
+   }
+   }
+
+   ifcvf_hw_disable(hw);
+
+   for (i = 0; i < hw->nr_vring; i++)
+   rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
+   hw->vring[i].last_used_idx);
+
+   if (internal->sw_lm)
+   return;
+
+   rte_vhost_get_negotiated_features(vid, &features);
+   if (RTE_VHOST_NEED_LOG(features)) {
+   if (internal->device_type == IFCVF_NET) {
+   ifcvf_disable_logging(hw);
+   rte_vhost_get_log_base(internal->vid, &log_base,
+   &log_size);
+   rte_vfio_container_dma_unmap(
+   internal->vfio_container_fd, log_base,
+   IFCVF_LOG_BASE, log_size);
+   }
/*
 * IFCVF marks dirty memory pages for only packet buffer,
 * SW helps to mark the used ring as dirty after device stops.
@@ -661,15 +718,17 @@ struct rte_vdpa_dev_info {
}
hw->vring[i].avail = gpa;
 
-   /* Direct I/O for Tx queue, relay for Rx queue */
-   if (i & 1) {
+   /* NETWORK: Direct I/O for Tx queue, relay for Rx queue
+* BLK: relay every queue
+*/
+   if ((i & 1) && (int

[PATCH v2 06/15] example/vdpa:add vdpa blk support in example

2022-01-25 Thread Andy Pei
Add virtio blk device suppoort to vdpa example.

Signed-off-by: Andy Pei 
---
 examples/vdpa/Makefile   |   2 +-
 examples/vdpa/main.c |   8 ++
 examples/vdpa/meson.build|   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++
 examples/vdpa/vdpa_blk_compact.h | 117 
 examples/vdpa/vhost_user.h   | 189 +++
 6 files changed, 466 insertions(+), 1 deletion(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
index d974db4..9d0479b 100644
--- a/examples/vdpa/Makefile
+++ b/examples/vdpa/Makefile
@@ -5,7 +5,7 @@
 APP = vdpa
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c vdpa_blk_compact.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 PKGCONF ?= pkg-config
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
index 5ab0765..924ad7b 100644
--- a/examples/vdpa/main.c
+++ b/examples/vdpa/main.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include "vdpa_blk_compact.h"
 
 #define MAX_PATH_LEN 128
 #define MAX_VDPA_SAMPLE_PORTS 1024
@@ -156,6 +157,7 @@ struct vdpa_port {
 static const struct rte_vhost_device_ops vdpa_sample_devops = {
.new_device = new_device,
.destroy_device = destroy_device,
+   .new_connection = rte_vhost_blk_session_install_rte_compat_hooks,
 };
 
 static int
@@ -192,6 +194,12 @@ struct vdpa_port {
"attach vdpa device failed: %s\n",
socket_path);
 
+   if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev)
+   < 0)
+   rte_exit(EXIT_FAILURE,
+   "set vhost blk driver features and protocol features 
failed: %s\n",
+   socket_path);
+
if (rte_vhost_driver_start(socket_path) < 0)
rte_exit(EXIT_FAILURE,
"start vhost driver failed: %s\n",
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
index bd08605..f0d111c 100644
--- a/examples/vdpa/meson.build
+++ b/examples/vdpa/meson.build
@@ -15,4 +15,5 @@ deps += 'vhost'
 allow_experimental_apis = true
 sources = files(
 'main.c',
+   'vdpa_blk_compact.c',
 )
diff --git a/examples/vdpa/vdpa_blk_compact.c b/examples/vdpa/vdpa_blk_compact.c
new file mode 100644
index 000..0c4d3ee
--- /dev/null
+++ b/examples/vdpa/vdpa_blk_compact.c
@@ -0,0 +1,150 @@
+/*INTEL CONFIDENTIAL
+ *
+ *Copyright (c) Intel Corporation.
+ *All rights reserved.
+ *
+ *The source code contained or described herein and all documents related
+ *to the source code ("Material") are owned by Intel Corporation or its
+ *suppliers or licensors.  Title to the Material remains with Intel
+ *Corporation or its suppliers and licensors.  The Material contains trade
+ *secrets and proprietary and confidential information of Intel or its
+ *suppliers and licensors.  The Material is protected by worldwide
+ *copyright and trade secret laws and treaty provisions.  No part of the
+ *Material may be used, copied, reproduced, modified, published, uploaded,
+ *posted, transmitted, distributed, or disclosed in any way without Intel's
+ *prior express written permission.
+ *
+ *No license under any patent, copyright, trade secret or other
+ *intellectual property right is granted to or conferred upon you by
+ *disclosure or delivery of the Materials, either expressly, by
+ *implication, inducement, estoppel or otherwise.  Any license under such
+ *intellectual property rights must be express and approved by Intel in
+ *writing.
+ */
+
+/* @file
+ *
+ * Block device specific vhost lib
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include "vdpa_blk_compact.h"
+#include "vhost_user.h"
+
+#define VHOST_USER_GET_CONFIG  24
+#define VHOST_USER_SET_CONFIG  25
+
+#ifndef VHOST_USER_PROTOCOL_F_CONFIG
+#define VHOST_USER_PROTOCOL_F_CONFIG   9
+#endif
+
+/*
+ * Function to handle vhost user blk message
+ */
+static enum rte_vhost_msg_result
+rte_vhost_blk_extern_vhost_pre_msg_handler(int vid, void *_msg)
+{
+   struct VhostUserMsg *msg = _msg;
+   struct rte_vdpa_device *vdev = NULL;
+
+   vdev = rte_vhost_get_vdpa_device(vid);
+   if (vdev == NULL)
+   return RTE_VHOST_MSG_RESULT_ERR;
+
+   fprintf(stderr, "msg is %d\n", msg->request.master);
+   switch (msg->request.master) {
+   case VHOST_USER_GET_CONFIG: {
+   int rc = 0;
+
+   fprintf(stdout, "read message VHOST_USER_GET_CONFIG\n");
+
+   if (vdev->ops->get_config) {
+   fprintf(stdout, "get_config() fun

[PATCH v2 07/15] usertools: add support for virtio blk device

2022-01-25 Thread Andy Pei
Add virtio blk device support to devbind.

Signed-off-by: Andy Pei 
---
 usertools/dpdk-devbind.py | 8 
 1 file changed, 8 insertions(+)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index ace4627..cbe336f 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -14,6 +14,8 @@
 from os.path import join as path_join
 
 # The PCI base class for all devices
+virtio_blk_class = {'Class': '01', 'Vendor': "1af4", 'Device': '1001',
+'SVendor': '8086', 'SDevice': '0002'}
 network_class = {'Class': '02', 'Vendor': None, 'Device': None,
  'SVendor': None, 'SDevice': None}
 acceleration_class = {'Class': '12', 'Vendor': None, 'Device': None,
@@ -72,6 +74,7 @@
 cn9k_ree = {'Class': '08', 'Vendor': '177d', 'Device': 'a0f4',
  'SVendor': None, 'SDevice': None}
 
+virtio_blk_devices = [virtio_blk_class]
 network_devices = [network_class, cavium_pkx, avp_vnic, ifpga_class]
 baseband_devices = [acceleration_class]
 crypto_devices = [encryption_class, intel_processor_class]
@@ -587,6 +590,9 @@ def show_status():
 Displays to the user what devices are bound to the igb_uio driver, the
 kernel driver or to no driver'''
 
+if status_dev in ["virtio_blk", "all"]:
+show_device_status(virtio_blk_devices, "virtio_blk")
+
 if status_dev in ["net", "all"]:
 show_device_status(network_devices, "Network", if_field=True)
 
@@ -746,6 +752,7 @@ def do_arg_actions():
 if b_flag is not None:
 clear_data()
 # refresh if we have changed anything
+get_device_details(virtio_blk_devices)
 get_device_details(network_devices)
 get_device_details(baseband_devices)
 get_device_details(crypto_devices)
@@ -769,6 +776,7 @@ def main():
 parse_args()
 check_modules()
 clear_data()
+get_device_details(virtio_blk_devices)
 get_device_details(network_devices)
 get_device_details(baseband_devices)
 get_device_details(crypto_devices)
-- 
1.8.3.1



[PATCH v2 08/15] vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk device

2022-01-25 Thread Andy Pei
Set_vring_state op is mandatory, add set_vring_state for blk device.
Currently set_vring_state for blk device is not implemented.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 14bc5c8..00e7274 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1373,6 +1373,16 @@ struct rte_vdpa_dev_info {
 }
 
 static int
+ifcvf_blk_set_vring_state(int vid, int vring, int state)
+{
+   RTE_SET_USED(vid);
+   RTE_SET_USED(vring);
+   RTE_SET_USED(state);
+
+   return 0;
+}
+
+static int
 ifcvf_blk_get_protocol_features(struct rte_vdpa_device *vdev,
uint64_t *features)
 {
@@ -1390,7 +1400,7 @@ struct rte_vdpa_dev_info {
.get_protocol_features = ifcvf_blk_get_protocol_features,
.dev_conf = ifcvf_dev_config,
.dev_close = ifcvf_dev_close,
-   .set_vring_state = NULL,
+   .set_vring_state = ifcvf_blk_set_vring_state,
.migration_done = NULL,
.get_vfio_group_fd = ifcvf_get_vfio_group_fd,
.get_vfio_device_fd = ifcvf_get_vfio_device_fd,
-- 
1.8.3.1



[PATCH v2 09/15] vdpa/ifc: add some log at VDPA launch before qemu connect

2022-01-25 Thread Andy Pei
Add some log of virtio blk device config space information
at VDPA launch before qemu connects.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 00e7274..ff91e80 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1436,6 +1436,9 @@ struct rte_vdpa_dev_info dev_info[] = {
struct rte_kvargs *kvlist = NULL;
int ret = 0;
int16_t device_id;
+   __u64 capacity = 0;
+   uint8_t *byte;
+   uint32_t i;
 
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
@@ -1502,6 +1505,31 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_BLK].features;
+
+   /* cannot read 64-bit register in one attempt,
+* so read byte by byte.
+*/
+   for (i = 0; i < sizeof(internal->hw.blk_cfg->capacity); i++) {
+   byte = (uint8_t *)&internal->hw.blk_cfg->capacity + i;
+   capacity |= (__u64)*byte << (i * 8);
+   }
+   DRV_LOG(INFO, "capacity  : %quG", capacity >> 21);
+
+   DRV_LOG(INFO, "size_max  : 0x%08x",
+   internal->hw.blk_cfg->size_max);
+   DRV_LOG(INFO, "seg_max   : 0x%08x",
+   internal->hw.blk_cfg->seg_max);
+   DRV_LOG(INFO, "blk_size  : 0x%08x",
+   internal->hw.blk_cfg->blk_size);
+   DRV_LOG(INFO, "geometry");
+   DRV_LOG(INFO, "cylinders: %u",
+   internal->hw.blk_cfg->geometry.cylinders);
+   DRV_LOG(INFO, "heads: %u",
+   internal->hw.blk_cfg->geometry.heads);
+   DRV_LOG(INFO, "sectors  : %u",
+   internal->hw.blk_cfg->geometry.sectors);
+   DRV_LOG(INFO, "num_queues: 0x%08x",
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v2 10/15] vdpa/ifc: read virtio max_queues from hardware

2022-01-25 Thread Andy Pei
Original code max_queues is set to IFCVF_MAX_QUEUES.
New code max_queues is the min of IFCVF_MAX_QUEUES and hardware num_queues.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index ff91e80..d30c3fd 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1530,6 +1530,10 @@ struct rte_vdpa_dev_info dev_info[] = {
internal->hw.blk_cfg->geometry.sectors);
DRV_LOG(INFO, "num_queues: 0x%08x",
internal->hw.blk_cfg->num_queues);
+
+   /* reset max_queue here, to minimum modification */
+   internal->max_queues = RTE_MIN(IFCVF_MAX_QUEUES,
+   internal->hw.blk_cfg->num_queues);
}
 
list->internal = internal;
-- 
1.8.3.1



[PATCH v2 11/15] vdpa: add config space change interrupt register and handle for virtio_blk

2022-01-25 Thread Andy Pei
Create a thread to poll and relay config space change interrupt.
Use VHOST_USER_SLAVE_CONFIG_CHANGE_MSG to info qemu.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/ifcvf_vdpa.c | 111 ++
 1 file changed, 111 insertions(+)

diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index d30c3fd..981cb26 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -53,7 +53,9 @@ struct ifcvf_internal {
int vfio_group_fd;
int vfio_dev_fd;
pthread_t tid;  /* thread for notify relay */
+   pthread_t intr_tid; /* thread for intr relay */
int epfd;
+   int csc_fd;
int vid;
struct rte_vdpa_device *vdev;
uint16_t max_queues;
@@ -617,6 +619,106 @@ struct rte_vdpa_dev_info {
return 0;
 }
 
+static void
+virtio_interrupt_handler(struct ifcvf_internal *internal)
+{
+   int vid = internal->vid;
+   int ret;
+
+   ret = rte_vhost_slave_config_change(vid, 1);
+   if (ret)
+   DRV_LOG(ERR, "failed to notify the guest about configuration 
space change.");
+}
+
+static void *
+intr_relay(void *arg)
+{
+   struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
+   struct epoll_event csc_event;
+   struct epoll_event ev;
+   uint64_t buf;
+   int nbytes;
+   int csc_fd, csc_val = 0;
+
+   csc_fd = epoll_create(1);
+   if (csc_fd < 0) {
+   DRV_LOG(ERR, "failed to create epoll for config space change.");
+   return NULL;
+   }
+
+   ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
+   ev.data.fd = rte_intr_fd_get(internal->pdev->intr_handle);
+   if (epoll_ctl(csc_fd, EPOLL_CTL_ADD,
+   rte_intr_fd_get(internal->pdev->intr_handle), &ev) < 0) {
+   DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
+   return NULL;
+   }
+
+   internal->csc_fd = csc_fd;
+
+   for (;;) {
+   csc_val = epoll_wait(csc_fd, &csc_event, 1, -1);
+   if (csc_val < 0) {
+   if (errno == EINTR)
+   continue;
+   DRV_LOG(ERR, "epoll_wait return fail\n");
+   return NULL;
+   } else if (csc_val == 0) {
+   continue;
+   } else {
+   /* csc_val > 0 */
+   nbytes = read(csc_event.data.fd, &buf, 8);
+   if (nbytes < 0) {
+   if (errno == EINTR || errno == EWOULDBLOCK)
+   continue;
+   DRV_LOG(ERR, "Error reading from file 
descriptor %d: %s\n",
+   csc_event.data.fd,
+   strerror(errno));
+   return NULL;
+   } else if (nbytes == 0) {
+   DRV_LOG(ERR, "Read nothing from file descriptor 
%d\n",
+   csc_event.data.fd);
+   continue;
+   } else {
+   virtio_interrupt_handler(internal);
+   }
+   }
+   }
+   return NULL;
+}
+
+static int
+setup_intr_relay(struct ifcvf_internal *internal)
+{
+   int ret;
+
+   ret = pthread_create(&internal->intr_tid, NULL, intr_relay,
+   (void *)internal);
+   if (ret) {
+   DRV_LOG(ERR, "failed to create notify relay pthread.");
+   return -1;
+   }
+   return 0;
+}
+
+static int
+unset_intr_relay(struct ifcvf_internal *internal)
+{
+   void *status;
+
+   if (internal->intr_tid) {
+   pthread_cancel(internal->intr_tid);
+   pthread_join(internal->intr_tid, &status);
+   }
+   internal->intr_tid = 0;
+
+   if (internal->csc_fd >= 0)
+   close(internal->csc_fd);
+   internal->csc_fd = -1;
+
+   return 0;
+}
+
 static int
 update_datapath(struct ifcvf_internal *internal)
 {
@@ -643,10 +745,16 @@ struct rte_vdpa_dev_info {
if (ret)
goto err;
 
+   ret = setup_intr_relay(internal);
+   if (ret)
+   goto err;
+
rte_atomic32_set(&internal->running, 1);
} else if (rte_atomic32_read(&internal->running) &&
   (!rte_atomic32_read(&internal->started) ||
!rte_atomic32_read(&internal->dev_attached))) {
+   ret = unset_intr_relay(internal);
+
ret = unset_notify_relay(internal);
if (ret)
goto err;
@@ 

[PATCH v2 12/15] vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe

2022-01-25 Thread Andy Pei
Add is_blk flag to ifcvf_hw, and init is_blk during probe.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.h | 1 +
 drivers/vdpa/ifc/ifcvf_vdpa.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 6dd7925..8e602af 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -149,6 +149,7 @@ struct ifcvf_hw {
u8 *lm_cfg;
struct vring_info vring[IFCVF_MAX_QUEUES * 2];
u8 nr_vring;
+   u8 is_blk;
struct ifcvf_pci_mem_resource mem_resource[IFCVF_PCI_MAX_RESOURCE];
 };
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 981cb26..4eb8f98 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -1608,11 +1608,13 @@ struct rte_vdpa_dev_info dev_info[] = {
 
if (device_id == VIRTIO_ID_NET) {
internal->device_type = IFCVF_NET;
+   internal->hw.is_blk = IFCVF_NET;
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_NET].features;
} else if (device_id == VIRTIO_ID_BLOCK) {
internal->device_type = IFCVF_BLK;
+   internal->hw.is_blk = IFCVF_BLK;
internal->features = features &
~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
internal->features |= dev_info[IFCVF_BLK].features;
-- 
1.8.3.1



[PATCH v2 13/15] vdpa/ifc/base: for blk device, live migration register is different from net device

2022-01-25 Thread Andy Pei
1.last_avail_idx is lower 16 bit of the register.
2.address of ring_state register is different between net and blk device.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c | 36 +---
 drivers/vdpa/ifc/base/ifcvf.h |  1 +
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index 3a69e53..a8a4728 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -216,10 +216,18 @@
&cfg->queue_used_hi);
IFCVF_WRITE_REG16(hw->vring[i].size, &cfg->queue_size);
 
-   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4) =
-   (u32)hw->vring[i].last_avail_idx |
-   ((u32)hw->vring[i].last_used_idx << 16);
+   if (hw->is_blk == IFCVF_BLK) {
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   } else if (hw->is_blk == IFCVF_NET) {
+   *(u32 *)(lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4) =
+   (u32)hw->vring[i].last_avail_idx |
+   ((u32)hw->vring[i].last_used_idx << 16);
+   }
 
IFCVF_WRITE_REG16(i + 1, &cfg->queue_msix_vector);
if (IFCVF_READ_REG16(&cfg->queue_msix_vector) ==
@@ -252,9 +260,23 @@
IFCVF_WRITE_REG16(i, &cfg->queue_select);
IFCVF_WRITE_REG16(0, &cfg->queue_enable);
IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
-   ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
-   (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
+
+   if (hw->is_blk) {
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   } else if (hw->is_blk == IFCVF_NET) {
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   (i / 2) * IFCVF_LM_CFG_SIZE +
+   (i % 2) * 4);
+   }
+
+   if (hw->is_blk == IFCVF_BLK)
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   else if (hw->is_blk == IFCVF_NET)
+   hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
}
 }
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 8e602af..7367094 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -65,6 +65,7 @@
 #define IFCVF_MEDIATED_VRING   0x2000
 
 #define IFCVF_32_BIT_MASK  0x
+#define IFCVF_16_BIT_MASK  0x
 
 #ifndef VHOST_USER_PROTOCOL_F_CONFIG
 #define VHOST_USER_PROTOCOL_F_CONFIG   9
-- 
1.8.3.1



[PATCH v2 14/15] vdpa/ifc: make sure hardware last_avail_idx and last_used_idx is the same when blk device pause

2022-01-25 Thread Andy Pei
When virtio blk device is pause, make sure hardware last_avail_idx and 
last_used_idx is the same.

Signed-off-by: Andy Pei 
---
 drivers/vdpa/ifc/base/ifcvf.c |  2 +-
 drivers/vdpa/ifc/base/ifcvf.h |  3 +++
 drivers/vdpa/ifc/ifcvf_vdpa.c | 32 +++-
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/vdpa/ifc/base/ifcvf.c b/drivers/vdpa/ifc/base/ifcvf.c
index a8a4728..7018048 100644
--- a/drivers/vdpa/ifc/base/ifcvf.c
+++ b/drivers/vdpa/ifc/base/ifcvf.c
@@ -116,7 +116,7 @@
IFCVF_WRITE_REG8(status, &hw->common_cfg->device_status);
 }
 
-STATIC void
+void
 ifcvf_reset(struct ifcvf_hw *hw)
 {
ifcvf_set_status(hw, 0);
diff --git a/drivers/vdpa/ifc/base/ifcvf.h b/drivers/vdpa/ifc/base/ifcvf.h
index 7367094..f22d18b 100644
--- a/drivers/vdpa/ifc/base/ifcvf.h
+++ b/drivers/vdpa/ifc/base/ifcvf.h
@@ -157,6 +157,9 @@ struct ifcvf_hw {
 int
 ifcvf_init_hw(struct ifcvf_hw *hw, PCI_DEV *dev);
 
+void
+ifcvf_reset(struct ifcvf_hw *hw);
+
 u64
 ifcvf_get_features(struct ifcvf_hw *hw);
 
diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
index 4eb8f98..b0b2859 100644
--- a/drivers/vdpa/ifc/ifcvf_vdpa.c
+++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
@@ -356,23 +356,32 @@ struct rte_vdpa_dev_info {
 vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal)
 {
struct ifcvf_hw *hw = &internal->hw;
-   struct rte_vhost_vring vq;
int i, vid;
uint64_t features = 0;
uint64_t log_base = 0, log_size = 0;
uint64_t len;
+   u32 ring_state = 0;
 
vid = internal->vid;
 
if (internal->device_type == IFCVF_BLK) {
for (i = 0; i < hw->nr_vring; i++) {
-   rte_vhost_get_vhost_vring(internal->vid, i, &vq);
-   while (vq.avail->idx != vq.used->idx) {
-   ifcvf_notify_queue(hw, i);
-   usleep(10);
-   }
-   hw->vring[i].last_avail_idx = vq.avail->idx;
-   hw->vring[i].last_used_idx = vq.used->idx;
+   do {
+   if (hw->lm_cfg != NULL)
+   ring_state = *(u32 *)(hw->lm_cfg +
+   IFCVF_LM_RING_STATE_OFFSET +
+   i * IFCVF_LM_CFG_SIZE);
+   hw->vring[i].last_avail_idx =
+   (u16)(ring_state & IFCVF_16_BIT_MASK);
+   hw->vring[i].last_used_idx =
+   (u16)(ring_state >> 16);
+   if (hw->vring[i].last_avail_idx !=
+   hw->vring[i].last_used_idx) {
+   ifcvf_notify_queue(hw, i);
+   usleep(10);
+   }
+   } while (hw->vring[i].last_avail_idx !=
+   hw->vring[i].last_used_idx);
}
}
 
@@ -759,7 +768,12 @@ struct rte_vdpa_dev_info {
if (ret)
goto err;
 
-   vdpa_ifcvf_stop(internal);
+   if (internal->device_type == IFCVF_BLK) {
+   vdpa_ifcvf_blk_pause(internal);
+   ifcvf_reset(&internal->hw);
+   } else {
+   vdpa_ifcvf_stop(internal);
+   }
 
ret = vdpa_disable_vfio_intr(internal);
if (ret)
-- 
1.8.3.1



[PATCH v2 15/15] vhost: make sure each queue callfd is configured

2022-01-25 Thread Andy Pei
During the vhost data path building process, qemu will create
a call fd at first, and create another call fd in the end.
The final call fd will be used to relay notify.
In the original code, after kick fd is set, dev_conf will
set the first call fd. Even though the actual call fd will set,
the data path will not work correctly.

Signed-off-by: Andy Pei 
---
 lib/vhost/vhost_user.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 5eb1dd6..b25b25f 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -3137,12 +3137,26 @@ typedef int (*vhost_message_handler_t)(struct 
virtio_net **pdev,
if (!vdpa_dev)
goto out;
 
+   if (request != VHOST_USER_SET_VRING_CALL)
+   goto out;
+
if (!(dev->flags & VIRTIO_DEV_VDPA_CONFIGURED)) {
if (vdpa_dev->ops->dev_conf(dev->vid))
VHOST_LOG_CONFIG(ERR,
 "Failed to configure vDPA device\n");
else
dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+   } else {
+   /* when VIRTIO_DEV_VDPA_CONFIGURED already configured
+* close the device and config the device again,
+* make sure the call fd of each queue is configured correctly.
+*/
+   if (vdpa_dev->ops->dev_close(dev->vid))
+   VHOST_LOG_CONFIG(ERR,
+"Failed to close vDPA device\n");
+   if (vdpa_dev->ops->dev_conf(dev->vid))
+   VHOST_LOG_CONFIG(ERR,
+"Failed to re-config vDPA device\n");
}
 
 out:
-- 
1.8.3.1



[PATCH v3 00/15] add virtio_blk device support to vdpa/ifc

2022-01-28 Thread Andy Pei
This patch set add virtio_blk device support to vdpa/ifc driver.
With a lot of similarities, I re-use part of vdpa/ifc driver.
Distinguish the virtio net and blk device by device id, and implement 
specific features and ops.
Add example to vdpa to support virtio_blk device.
To support blk device live migration, some modification to vhost lib.
Perform dev_conf op only under VHOST_USER_SET_VRING_CALL msg.

v3:
 Fix some compile issues.
v2:
 Fix some coding style issues.

Andy Pei (15):
  vdpa/ifc: add support for virtio blk device
  vhost: add vdpa ops for blk device
  vdpa/ifc: add blk ops for ifc device
  vdpa/ifc: add vdpa interrupt for blk device
  vdpa/ifc: add blk dev sw live migration
  example/vdpa:add vdpa blk support in example
  usertools: add support for virtio blk device
  vdpa/ifc: set_vring_state op is mandatory, add set_vring_state for blk
device
  vdpa/ifc: add some log at VDPA launch before qemu connect
  vdpa/ifc: read virtio max_queues from hardware
  vdpa: add config space change interrupt register and handle for
virtio_blk
  vdpa/ifc: add is_blk flag to ifcvf_hw, and init is_blk during probe
  vdpa/ifc/base: for blk device, live migration register is different
from net device
  vdpa/ifc: make sure hardware last_avail_idx and last_used_idx are the
same when blk device pause
  vhost: make sure each queue callfd is configured

 drivers/vdpa/ifc/base/ifcvf.c|  42 +++-
 drivers/vdpa/ifc/base/ifcvf.h|  29 ++-
 drivers/vdpa/ifc/ifcvf_vdpa.c| 520 ---
 examples/vdpa/Makefile   |   2 +-
 examples/vdpa/main.c |   8 +
 examples/vdpa/meson.build|   1 +
 examples/vdpa/vdpa_blk_compact.c | 150 +++
 examples/vdpa/vdpa_blk_compact.h | 117 +
 examples/vdpa/vhost_user.h   | 189 ++
 lib/vhost/vdpa_driver.h  |   8 +-
 lib/vhost/vhost_user.c   |  14 ++
 usertools/dpdk-devbind.py|   8 +
 12 files changed, 1034 insertions(+), 54 deletions(-)
 create mode 100644 examples/vdpa/vdpa_blk_compact.c
 create mode 100644 examples/vdpa/vdpa_blk_compact.h
 create mode 100644 examples/vdpa/vhost_user.h

-- 
1.8.3.1



  1   2   3   4   5   6   >