Re: [PATCH net-next v9] virtio_net: Support RX hash XDP hint

2024-04-17 Thread Heng Qi




在 2024/4/17 下午3:18, Liang Chen 写道:

The RSS hash report is a feature that's part of the virtio specification.
Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
(still a work in progress as per [1]) support this feature. While the
capability to obtain the RSS hash has been enabled in the normal path,
it's currently missing in the XDP path. Therefore, we are introducing
XDP hints through kfuncs to allow XDP programs to access the RSS hash.

1.
https://lore.kernel.org/all/20231015141644.260646-1-akihiko.od...@daynix.com/#r

Signed-off-by: Liang Chen 


Reviewed-by: Heng Qi 

Thanks.


---
   Changes from v8:
- move max table macro out of uAPI
   Changes from v7:
- use table lookup for rss hash type
   Changes from v6:
- fix a coding style issue
   Changes from v5:
- Preservation of the hash value has been dropped, following the conclusion
   from discussions in V3 reviews. The virtio_net driver doesn't
   accessing/using the virtio_net_hdr after the XDP program execution, so
   nothing tragic should happen. As to the xdp program, if it smashes the
   entry in virtio header, it is likely buggy anyways. Additionally, looking
   up the Intel IGC driver,  it also does not bother with this particular
   aspect.
---
  drivers/net/virtio_net.c | 43 
  1 file changed, 43 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a133..eb99bf6c555e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4621,6 +4621,48 @@ static void virtnet_set_big_packets(struct virtnet_info 
*vi, const int mtu)
}
  }
  
+#define VIRTIO_NET_HASH_REPORT_MAX_TABLE  10

+static enum xdp_rss_hash_type
+virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = {
+   [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE,
+   [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4,
+   [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP,
+   [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP,
+   [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6,
+   [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP,
+   [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP,
+   [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
+   [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
+   [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
+};
+
+static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
+  enum xdp_rss_hash_type *rss_type)
+{
+   const struct xdp_buff *xdp = (void *)_ctx;
+   struct virtio_net_hdr_v1_hash *hdr_hash;
+   struct virtnet_info *vi;
+   u16 hash_report;
+
+   if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
+   return -ENODATA;
+
+   vi = netdev_priv(xdp->rxq->dev);
+   hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
+   hash_report = __le16_to_cpu(hdr_hash->hash_report);
+
+   if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE)
+   hash_report = VIRTIO_NET_HASH_REPORT_NONE;
+
+   *rss_type = virtnet_xdp_rss_type[hash_report];
+   *hash = __le32_to_cpu(hdr_hash->hash_value);
+   return 0;
+}
+
+static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
+   .xmo_rx_hash= virtnet_xdp_rx_hash,
+};
+
  static int virtnet_probe(struct virtio_device *vdev)
  {
int i, err = -ENOMEM;
@@ -4747,6 +4789,7 @@ static int virtnet_probe(struct virtio_device *vdev)
  VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
  
  		dev->hw_features |= NETIF_F_RXHASH;

+   dev->xdp_metadata_ops = _xdp_metadata_ops;
}
  
  	if (vi->has_rss_hash_report)





Re: [PATCH net-next v8] virtio_net: Support RX hash XDP hint

2024-04-16 Thread Heng Qi




在 2024/4/16 下午2:19, Liang Chen 写道:

The RSS hash report is a feature that's part of the virtio specification.
Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
(still a work in progress as per [1]) support this feature. While the
capability to obtain the RSS hash has been enabled in the normal path,
it's currently missing in the XDP path. Therefore, we are introducing
XDP hints through kfuncs to allow XDP programs to access the RSS hash.

1.
https://lore.kernel.org/all/20231015141644.260646-1-akihiko.od...@daynix.com/#r

Signed-off-by: Liang Chen 
---
   Changes from v7:
- use table lookup for rss hash type
   Changes from v6:
- fix a coding style issue
   Changes from v5:
- Preservation of the hash value has been dropped, following the conclusion
   from discussions in V3 reviews. The virtio_net driver doesn't
   accessing/using the virtio_net_hdr after the XDP program execution, so
   nothing tragic should happen. As to the xdp program, if it smashes the
   entry in virtio header, it is likely buggy anyways. Additionally, looking
   up the Intel IGC driver,  it also does not bother with this particular
   aspect.
---
  drivers/net/virtio_net.c| 42 +
  include/uapi/linux/virtio_net.h |  1 +
  2 files changed, 43 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a133..1d750009f615 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4621,6 +4621,47 @@ static void virtnet_set_big_packets(struct virtnet_info 
*vi, const int mtu)
}
  }
  
+static enum xdp_rss_hash_type

+virtnet_xdp_rss_type[VIRTIO_NET_HASH_REPORT_MAX_TABLE] = {
+   [VIRTIO_NET_HASH_REPORT_NONE] = XDP_RSS_TYPE_NONE,
+   [VIRTIO_NET_HASH_REPORT_IPv4] = XDP_RSS_TYPE_L3_IPV4,
+   [VIRTIO_NET_HASH_REPORT_TCPv4] = XDP_RSS_TYPE_L4_IPV4_TCP,
+   [VIRTIO_NET_HASH_REPORT_UDPv4] = XDP_RSS_TYPE_L4_IPV4_UDP,
+   [VIRTIO_NET_HASH_REPORT_IPv6] = XDP_RSS_TYPE_L3_IPV6,
+   [VIRTIO_NET_HASH_REPORT_TCPv6] = XDP_RSS_TYPE_L4_IPV6_TCP,
+   [VIRTIO_NET_HASH_REPORT_UDPv6] = XDP_RSS_TYPE_L4_IPV6_UDP,
+   [VIRTIO_NET_HASH_REPORT_IPv6_EX] = XDP_RSS_TYPE_L3_IPV6_EX,
+   [VIRTIO_NET_HASH_REPORT_TCPv6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
+   [VIRTIO_NET_HASH_REPORT_UDPv6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX
+};
+
+static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
+  enum xdp_rss_hash_type *rss_type)
+{
+   const struct xdp_buff *xdp = (void *)_ctx;
+   struct virtio_net_hdr_v1_hash *hdr_hash;
+   struct virtnet_info *vi;
+   u16 hash_report;
+
+   if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))
+   return -ENODATA;
+
+   vi = netdev_priv(xdp->rxq->dev);
+   hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);
+   hash_report = __le16_to_cpu(hdr_hash->hash_report);
+
+   if (hash_report >= VIRTIO_NET_HASH_REPORT_MAX_TABLE)
+   hash_report = VIRTIO_NET_HASH_REPORT_NONE;


When this happens, it may mean an error or user modification of the 
header occurred.

Should the following *hash* value be cleared to 0?

Thanks,
Heng


+
+   *rss_type = virtnet_xdp_rss_type[hash_report];
+   *hash = __le32_to_cpu(hdr_hash->hash_value);
+   return 0;
+}
+
+static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
+   .xmo_rx_hash= virtnet_xdp_rx_hash,
+};
+
  static int virtnet_probe(struct virtio_device *vdev)
  {
int i, err = -ENOMEM;
@@ -4747,6 +4788,7 @@ static int virtnet_probe(struct virtio_device *vdev)
  VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
  
  		dev->hw_features |= NETIF_F_RXHASH;

+   dev->xdp_metadata_ops = _xdp_metadata_ops;
}
  
  	if (vi->has_rss_hash_report)

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index cc65ef0f3c3e..3ee695450096 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -176,6 +176,7 @@ struct virtio_net_hdr_v1_hash {
  #define VIRTIO_NET_HASH_REPORT_IPv6_EX 7
  #define VIRTIO_NET_HASH_REPORT_TCPv6_EX8
  #define VIRTIO_NET_HASH_REPORT_UDPv6_EX9
+#define VIRTIO_NET_HASH_REPORT_MAX_TABLE  10
__le16 hash_report;
__le16 padding;
  };





Re: [PATCH v4] vp_vdpa: don't allocate unused msix vectors

2024-04-09 Thread Heng Qi




在 2024/4/9 下午4:58, lyx634449800 写道:

From: Yuxue Liu 

When there is a ctlq and it doesn't require interrupt
callbacks,the original method of calculating vectors
wastes hardware msi or msix resources as well as system
IRQ resources.

When conducting performance testing using testpmd in the
guest os, it was found that the performance was lower compared
to directly using vfio-pci to passthrough the device

In scenarios where the virtio device in the guest os does
not utilize interrupts, the vdpa driver still configures
the hardware's msix vector. Therefore, the hardware still
sends interrupts to the host os. Because of this unnecessary
action by the hardware, hardware performance decreases, and
it also affects the performance of the host os.

Before modification:(interrupt mode)
  32:  0   0  0  0 PCI-MSI 32768-edgevp-vdpa[:00:02.0]-0
  33:  0   0  0  0 PCI-MSI 32769-edgevp-vdpa[:00:02.0]-1
  34:  0   0  0  0 PCI-MSI 32770-edgevp-vdpa[:00:02.0]-2
  35:  0   0  0  0 PCI-MSI 32771-edgevp-vdpa[:00:02.0]-config

After modification:(interrupt mode)
  32:  0  0  1  7   PCI-MSI 32768-edge  vp-vdpa[:00:02.0]-0
  33: 36  0  3  0   PCI-MSI 32769-edge  vp-vdpa[:00:02.0]-1
  34:  0  0  0  0   PCI-MSI 32770-edge  vp-vdpa[:00:02.0]-config

Before modification:(virtio pmd mode for guest os)
  32:  0   0  0  0 PCI-MSI 32768-edgevp-vdpa[:00:02.0]-0
  33:  0   0  0  0 PCI-MSI 32769-edgevp-vdpa[:00:02.0]-1
  34:  0   0  0  0 PCI-MSI 32770-edgevp-vdpa[:00:02.0]-2
  35:  0   0  0  0 PCI-MSI 32771-edgevp-vdpa[:00:02.0]-config

After modification:(virtio pmd mode for guest os)
  32: 0  0  0   0   PCI-MSI 32768-edge   vp-vdpa[:00:02.0]-config

To verify the use of the virtio PMD mode in the guest operating
system, the following patch needs to be applied to QEMU:
https://lore.kernel.org/all/20240408073311.2049-1-yuxue@jaguarmicro.com

Signed-off-by: Yuxue Liu 
Acked-by: Jason Wang 
---
V4: Update the title and assign values to uninitialized variables
V3: delete unused variables and add validation records
V2: fix when allocating IRQs, scan all queues

  drivers/vdpa/virtio_pci/vp_vdpa.c | 23 +--
  1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c 
b/drivers/vdpa/virtio_pci/vp_vdpa.c
index df5f4a3bccb5..74bc8adfc7e8 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -160,7 +160,14 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa)
struct pci_dev *pdev = mdev->pci_dev;
int i, ret, irq;
int queues = vp_vdpa->queues;
-   int vectors = queues + 1;
+   int vectors = 0;
+   int msix_vec = 0;
+
+   for (i = 0; i < queues; i++) {
+   if (vp_vdpa->vring[i].cb.callback)
+   vectors++;
+   }
+   vectors++;
  
  	ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX);

if (ret != vectors) {
@@ -173,9 +180,12 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa)
vp_vdpa->vectors = vectors;
  
  	for (i = 0; i < queues; i++) {

+   if (!vp_vdpa->vring[i].cb.callback)
+   continue;
+
snprintf(vp_vdpa->vring[i].msix_name, VP_VDPA_NAME_SIZE,
"vp-vdpa[%s]-%d\n", pci_name(pdev), i);
-   irq = pci_irq_vector(pdev, i);
+   irq = pci_irq_vector(pdev, msix_vec);
ret = devm_request_irq(>dev, irq,
   vp_vdpa_vq_handler,
   0, vp_vdpa->vring[i].msix_name,
@@ -185,21 +195,22 @@ static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa)
"vp_vdpa: fail to request irq for vq %d\n", i);
goto err;
}
-   vp_modern_queue_vector(mdev, i, i);
+   vp_modern_queue_vector(mdev, i, msix_vec);
vp_vdpa->vring[i].irq = irq;
+   msix_vec++;
}
  
  	snprintf(vp_vdpa->msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-config\n",

 pci_name(pdev));
-   irq = pci_irq_vector(pdev, queues);
+   irq = pci_irq_vector(pdev, msix_vec);
ret = devm_request_irq(>dev, irq,  vp_vdpa_config_handler, 0,
   vp_vdpa->msix_name, vp_vdpa);
if (ret) {
dev_err(>dev,
-   "vp_vdpa: fail to request irq for vq %d\n", i);
+   "vp_vdpa: fail to request irq for config, ret %d\n", 
ret);
goto err;
}
-   vp_modern_config_vector(mdev, queues);
+   vp_modern_config_vector(mdev, msix_vec);
vp_vdpa->config_irq = irq;
  
  	return 0;


Reviewed-by: Heng Qi 



Re: [PATCH net v3] virtio_net: Do not send RSS key if it is not supported

2024-04-03 Thread Heng Qi




在 2024/4/1 上午4:20, Michael S. Tsirkin 写道:

On Fri, Mar 29, 2024 at 10:16:41AM -0700, Breno Leitao wrote:

There is a bug when setting the RSS options in virtio_net that can break
the whole machine, getting the kernel into an infinite loop.

Running the following command in any QEMU virtual machine with virtionet
will reproduce this problem:

 # ethtool -X eth0  hfunc toeplitz

This is how the problem happens:

1) ethtool_set_rxfh() calls virtnet_set_rxfh()

2) virtnet_set_rxfh() calls virtnet_commit_rss_command()

3) virtnet_commit_rss_command() populates 4 entries for the rss
scatter-gather

4) Since the command above does not have a key, then the last
scatter-gatter entry will be zeroed, since rss_key_size == 0.
sg_buf_size = vi->rss_key_size;

5) This buffer is passed to qemu, but qemu is not happy with a buffer
with zero length, and do the following in virtqueue_map_desc() (QEMU
function):

   if (!sz) {
   virtio_error(vdev, "virtio: zero sized buffers are not allowed");

6) virtio_error() (also QEMU function) set the device as broken

 vdev->broken = true;

7) Qemu bails out, and do not repond this crazy kernel.

8) The kernel is waiting for the response to come back (function
virtnet_send_command())

9) The kernel is waiting doing the following :

   while (!virtqueue_get_buf(vi->cvq, ) &&
 !virtqueue_is_broken(vi->cvq))
  cpu_relax();

10) None of the following functions above is true, thus, the kernel
loops here forever. Keeping in mind that virtqueue_is_broken() does
not look at the qemu `vdev->broken`, so, it never realizes that the
vitio is broken at QEMU side.

Fix it by not sending RSS commands if the feature is not available in
the device.

Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.")
Cc: sta...@vger.kernel.org

net has its own stable process, don't CC stable on net patches.



Cc: qemu-de...@nongnu.org
Signed-off-by: Breno Leitao 
---
Changelog:

V2:
   * Moved from creating a valid packet, by rejecting the request
 completely
V3:
   * Got some good feedback from and Xuan Zhuo and Heng Qi, and reworked
 the rejection path.

---
  drivers/net/virtio_net.c | 22 ++
  1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a133..c4a21ec51adf 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct netlink_ext_ack *extack)
  {
struct virtnet_info *vi = netdev_priv(dev);
+   bool update = false;
int i;
  
  	if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&

@@ -3814,13 +3815,24 @@ static int virtnet_set_rxfh(struct net_device *dev,
return -EOPNOTSUPP;
  
  	if (rxfh->indir) {

+   if (!vi->has_rss)
+   return -EOPNOTSUPP;
+
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+   update = true;
}
-   if (rxfh->key)
+
+   if (rxfh->key) {
+   if (!vi->has_rss && !vi->has_rss_hash_report)
+   return -EOPNOTSUPP;


What's the logic here? Is it || or &&? A comment can't hurt.


&&

Hi Breno,

You can add a comment like the following:

If either _F_HASH_REPORT or _F_RSS are negotiated, the device provides
hash calculation capabilities, that is, hash_key can be configured.

Regards,
Heng




+
memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+   update = true;
+   }
  
-	virtnet_commit_rss_command(vi);

+   if (update)
+   virtnet_commit_rss_command(vi);
  
  	return 0;

  }
@@ -4729,13 +4741,15 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
  
-	if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))

+   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
  
-	if (vi->has_rss || vi->has_rss_hash_report) {

vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
+   }
+
+   if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, 
rss_max_key_size));
  
--

2.43.0





Re: [PATCH net v3] virtio_net: Do not send RSS key if it is not supported

2024-03-31 Thread Heng Qi




在 2024/3/30 上午1:16, Breno Leitao 写道:

There is a bug when setting the RSS options in virtio_net that can break
the whole machine, getting the kernel into an infinite loop.

Running the following command in any QEMU virtual machine with virtionet
will reproduce this problem:

 # ethtool -X eth0  hfunc toeplitz

This is how the problem happens:

1) ethtool_set_rxfh() calls virtnet_set_rxfh()

2) virtnet_set_rxfh() calls virtnet_commit_rss_command()

3) virtnet_commit_rss_command() populates 4 entries for the rss
scatter-gather

4) Since the command above does not have a key, then the last
scatter-gatter entry will be zeroed, since rss_key_size == 0.
sg_buf_size = vi->rss_key_size;

5) This buffer is passed to qemu, but qemu is not happy with a buffer
with zero length, and do the following in virtqueue_map_desc() (QEMU
function):

   if (!sz) {
   virtio_error(vdev, "virtio: zero sized buffers are not allowed");

6) virtio_error() (also QEMU function) set the device as broken

 vdev->broken = true;

7) Qemu bails out, and do not repond this crazy kernel.

8) The kernel is waiting for the response to come back (function
virtnet_send_command())

9) The kernel is waiting doing the following :

   while (!virtqueue_get_buf(vi->cvq, ) &&
 !virtqueue_is_broken(vi->cvq))
  cpu_relax();

10) None of the following functions above is true, thus, the kernel
loops here forever. Keeping in mind that virtqueue_is_broken() does
not look at the qemu `vdev->broken`, so, it never realizes that the
vitio is broken at QEMU side.

Fix it by not sending RSS commands if the feature is not available in
the device.

Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.")
Cc: sta...@vger.kernel.org
Cc: qemu-de...@nongnu.org
Signed-off-by: Breno Leitao 
---
Changelog:

V2:
   * Moved from creating a valid packet, by rejecting the request
 completely
V3:
   * Got some good feedback from and Xuan Zhuo and Heng Qi, and reworked
 the rejection path.

---
  drivers/net/virtio_net.c | 22 ++
  1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a133..c4a21ec51adf 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct netlink_ext_ack *extack)
  {
struct virtnet_info *vi = netdev_priv(dev);
+   bool update = false;
int i;
  
  	if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&

@@ -3814,13 +3815,24 @@ static int virtnet_set_rxfh(struct net_device *dev,
return -EOPNOTSUPP;
  
  	if (rxfh->indir) {

+   if (!vi->has_rss)
+   return -EOPNOTSUPP;
+
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+   update = true;
}
-   if (rxfh->key)
+
+   if (rxfh->key) {
+   if (!vi->has_rss && !vi->has_rss_hash_report)
+   return -EOPNOTSUPP;
+
memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+   update = true;
+   }
  
-	virtnet_commit_rss_command(vi);

+   if (update)
+   virtnet_commit_rss_command(vi);
  
  	return 0;

  }
@@ -4729,13 +4741,15 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
  
-	if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))

+   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
  
-	if (vi->has_rss || vi->has_rss_hash_report) {

vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
+   }
+
+   if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, 
rss_max_key_size));
  


Reviewed-by: Heng Qi 

Thanks.






Re: [PATCH net v2 1/2] virtio_net: Do not set rss_indir if RSS is not supported

2024-03-28 Thread Heng Qi




在 2024/3/28 下午10:37, Breno Leitao 写道:

On Wed, Mar 27, 2024 at 09:37:43AM +0800, Xuan Zhuo wrote:

On Tue, 26 Mar 2024 08:19:08 -0700, Breno Leitao  wrote:

Do not set virtnet_info->rss_indir_table_size if RSS is not available
for the device.

Currently, rss_indir_table_size is set if either has_rss or
has_rss_hash_report is available, but, it should only be set if has_rss
is set.

On the virtnet_set_rxfh(), return an invalid command if the request has
indirection table set, but virtnet does not support RSS.

Suggested-by: Heng Qi 
Signed-off-by: Breno Leitao 
---
  drivers/net/virtio_net.c | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a133..c640fdf28fc5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3813,6 +3813,9 @@ static int virtnet_set_rxfh(struct net_device *dev,
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;

+   if (rxfh->indir && !vi->has_rss)
+   return -EINVAL;
+
if (rxfh->indir) {

Put !vi->has_rss here?

I am not sure I understand the suggestion. Where do you suggest we have
!vi->has_rss?

If we got this far, we either have:

a) rxfh->indir set and vi->has_rss is also set
b) rxfh->indir not set. (vi->has_rss could be set or not).


This function does two tasks.
1. update indir table
2. update rss key

#1 only for has_rss
#2 for has_rss or has_rss_hash_report


So I would code:

bool update = false

if (rxfh->indir) {
if (!vi->has_rss)
return -EINVAL;

for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];

update = true
}

if (rxfh->key) {
if (!vi->has_rss && !vi->has_rss_hash_report)
return -EINVAL;

memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
update = true
}


if (update)
virtnet_commit_rss_command(vi);

Thanks.

This is a bit different from the previous proposal, but, I like this one
approach better. It makes the code easier to read.

How does the full patch looks like?


diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a133..180f342f1898 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3807,20 +3807,35 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct netlink_ext_ack *extack)
  {
struct virtnet_info *vi = netdev_priv(dev);
+   bool update = false;
int i;
  
+	if (!vi->has_rss && !vi->has_rss_hash_report)

+   return -EOPNOTSUPP;
+


I think this two modifications are no longer needed as they have been 
embedded below.



if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
  
  	if (rxfh->indir) {

+   if (!vi->has_rss)
+   return -EINVAL;


-EOPNOTSUPP seems better.


+
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+   update = true;
}
-   if (rxfh->key)
+
+   if (rxfh->key) {
+   if (!vi->has_rss && !vi->has_rss_hash_report)
+   return -EINVAL;


Same as above. return -EOPNOTSUPP.

Others look good.

Thanks.


+
memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+   update = true;
+   }
  
-	virtnet_commit_rss_command(vi);

+   if (update)
+   virtnet_commit_rss_command(vi);
  
  	return 0;

  }
@@ -4729,13 +4744,15 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
  
-	if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))

+   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
  
-	if (vi->has_rss || vi->has_rss_hash_report) {

vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
+   }
+
+   if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, 
rss_max_key_size));
  





Re: [PATCH net v2 2/2] virtio_net: Do not send RSS key if it is not supported

2024-03-26 Thread Heng Qi




在 2024/3/26 下午11:19, Breno Leitao 写道:

There is a bug when setting the RSS options in virtio_net that can break
the whole machine, getting the kernel into an infinite loop.

Running the following command in any QEMU virtual machine with virtionet
will reproduce this problem:

 # ethtool -X eth0  hfunc toeplitz

This is how the problem happens:

1) ethtool_set_rxfh() calls virtnet_set_rxfh()

2) virtnet_set_rxfh() calls virtnet_commit_rss_command()

3) virtnet_commit_rss_command() populates 4 entries for the rss
scatter-gather

4) Since the command above does not have a key, then the last
scatter-gatter entry will be zeroed, since rss_key_size == 0.
sg_buf_size = vi->rss_key_size;

5) This buffer is passed to qemu, but qemu is not happy with a buffer
with zero length, and do the following in virtqueue_map_desc() (QEMU
function):

   if (!sz) {
   virtio_error(vdev, "virtio: zero sized buffers are not allowed");

6) virtio_error() (also QEMU function) set the device as broken

 vdev->broken = true;

7) Qemu bails out, and do not repond this crazy kernel.

8) The kernel is waiting for the response to come back (function
virtnet_send_command())

9) The kernel is waiting doing the following :

   while (!virtqueue_get_buf(vi->cvq, ) &&
 !virtqueue_is_broken(vi->cvq))
  cpu_relax();

10) None of the following functions above is true, thus, the kernel
loops here forever. Keeping in mind that virtqueue_is_broken() does
not look at the qemu `vdev->broken`, so, it never realizes that the
vitio is broken at QEMU side.

Fix it by not sending RSS commands if the feature is not available in
the device.

Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.")
Cc: sta...@vger.kernel.org
Cc: qemu-de...@nongnu.org
Signed-off-by: Breno Leitao 
---
  drivers/net/virtio_net.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c640fdf28fc5..e6b0eaf08ac2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3809,6 +3809,9 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct virtnet_info *vi = netdev_priv(dev);
int i;
  
+	if (!vi->has_rss && !vi->has_rss_hash_report)

+   return -EOPNOTSUPP;
+


Why not make the second patch as the first, this seems to work better.
Or squash them into one patch.

Apart from these and Xuan's comments.

For series:

        Reviewed-by: Heng Qi 

Regards,
Heng


if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;





Re: [PATCH] virtio_net: Do not send RSS key if it is not supported

2024-03-25 Thread Heng Qi
 virtio_cread16(vdev, offsetof(struct 
virtio_net_config,

    rss_max_indirection_table_length));
+   }
+
+   if (vi->has_rss || vi->has_rss_hash_report) {
    vi->rss_key_size =
    virtio_cread8(vdev, offsetof(struct 
virtio_net_config, rss_max_key_size));



Regards,
Heng



@Heng Qi

Could you help us?

Thanks.



if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;

Thanks!





Re: [RFC PATCH 0/2] net: provides dim profile fine-tuning channels

2024-03-14 Thread Heng Qi




在 2024/3/15 上午2:49, Jakub Kicinski 写道:

On Thu, 14 Mar 2024 21:09:31 +0800 Heng Qi wrote:

The NetDIM library provides excellent acceleration for many modern
network cards. However, the default profiles of DIM limits its maximum
capabilities for different NICs, so providing a channel through which
the NIC can be custom configured is necessary.

Given that DIM is currently enabled and disable via ethtool
why are you putting the API is sysfs and ops in ndo?


Hi Jakub,

Thank you for reaching out. We're flexible regarding configuration 
methods and are
open to using either sysfs or ethtool, depending on what's most 
appropriate for the task at hand.


If the ethtool is favored, I am happy to proceed with it!

Best regards,
Heng




[RFC PATCH 1/2] net: add sysfs attributes for customized dim profile management

2024-03-14 Thread Heng Qi
The NetDIM library, currently leveraged by an array of NICs, delivers
excellent acceleration benefits. Nevertheless, NICs vary significantly
in their dim profile list prerequisites.

Specifically, virtio-net backends may present diverse sw or hw device
implementation, making a one-size-fits-all parameter list impractical.
On Alibaba Cloud, the virtio DPU's performance under the default DIM
profile falls short of expectations, partly due to a mismatch in
parameter configuration.

I also noticed that ice/idpf/ena and other NICs have customized
profilelist or placed some restrictions on dim capabilities.

Motivated by this, I tried adding new sysfs attributes that provides
a per-device control to modify and access a device's interrupt parameters.

Usage

1. Query the currently customized list of the device

$ cat dim_profs
The profiles of (RX, EQE):
{.usec =   1, .pkts = 256, .comps =   0,},
{.usec =   8, .pkts = 256, .comps =   0,},
{.usec =  64, .pkts = 256, .comps =   0,},
{.usec = 128, .pkts = 256, .comps =   0,},
{.usec = 256, .pkts = 256, .comps =   0,}
The profiles of (TX, EQE):
{.usec =   1, .pkts = 256, .comps =   0,},
{.usec =   2, .pkts = 256, .comps =   0,},
{.usec =   3, .pkts = 256, .comps =   0,},
{.usec =   4, .pkts = 256, .comps =   0,},
{.usec =   5, .pkts = 256, .comps =   0,}

2. Tune

$ echo "RX EQE 8,8,0 16,16,0 32,32,0 64,64,0 128,128,0" > dim_profs
$ echo "  TX  EQE 0,2,0   1,3,0 2,4,0   3,5,0  4,6,0   " > dim_profs
$ cat dim_profs
The profiles of (RX, EQE):
{.usec =   8, .pkts =   8, .comps =   0,},
{.usec =  16, .pkts =  16, .comps =   0,},
{.usec =  32, .pkts =  32, .comps =   0,},
{.usec =  64, .pkts =  64, .comps =   0,},
{.usec = 128, .pkts = 128, .comps =   0,}
The profiles of (TX, EQE):
{.usec =   0, .pkts =   2, .comps =   0,},
{.usec =   1, .pkts =   3, .comps =   0,},
{.usec =   2, .pkts =   4, .comps =   0,},
{.usec =   3, .pkts =   5, .comps =   0,},
{.usec =   4, .pkts =   6, .comps =   0,}

3. Warn
If the device does not support .ndo_dim_moder_{set, get},
the following warning will response:
"Profile is default and not customized by the device."

Signed-off-by: Heng Qi 
---
 Documentation/ABI/testing/sysfs-class-net |  17 +++
 include/linux/dim.h   |   7 ++
 include/linux/netdevice.h |  35 ++
 lib/dim/net_dim.c |   6 --
 net/core/net-sysfs.c  | 172 ++
 5 files changed, 231 insertions(+), 6 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-net 
b/Documentation/ABI/testing/sysfs-class-net
index ebf21be..1e4faa8 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -352,3 +352,20 @@ Description:
0  threaded mode disabled for this dev
1  threaded mode enabled for this dev
== ==
+
+What:  /sys/class/net//dim_profs
+Date:  Mar 2024
+KernelVersion: 6.8
+Contact:   net...@vger.kernel.org
+Description:
+   String value to control the profile list of DIM per device. 
User could
+   set this value to tune the profile list for RX/TX direction and 
EQE/CQE
+   mode respectively.
+
+   Possible values:
+    
==
+   RX EQE 1,1,0  2,2,0   3,3,0   4,4,05,5,0 tune RX + EQE 
profile list
+   RX CQE 8,8,0  16,16,0 32,32,0 64,64,0  128,128,0 tune RX + CQE 
profile list
+   TX EQE 16,8,0 2,16,0  16,8,0  32,64,0  128,64,0  tune TX + EQE 
profile list
+   TX CQE 8,5,0  8,16,0  32,12,0 128,64,0 256,128,0 tune TX + CQE 
profile list
+    
==
diff --git a/include/linux/dim.h b/include/linux/dim.h
index f343bc9..43398f5 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -10,6 +10,13 @@
 #include 
 #include 
 
+/* Number of DIM profiles and period mode. */
+#define NET_DIM_PARAMS_NUM_PROFILES 5
+#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
+#define NET_DIM_DEF_PROFILE_CQE 1
+#define NET_DIM_DEF_PROFILE_EQE 1
+
 /*
  * Number of events between DIM iterations.
  * Causes a moderation of the algorithm run.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c6f6ac7..bc2f3ac 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -49,6 +49,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -998,6 +999,27 @@ struct netdev_net_notifier {
struct notifier_block *nb;
 };
 
+enum dim_direction {
+   DIM_RX_DIRECTION = 0x0,
+   DIM_TX_DIRECTION = 0x1,
+   DIM_NUM_DIRECTIONS
+};
+/**
+ * struct dim_profs_list - Structure for dim sysfs configuration.
+ * Used to exc

[RFC PATCH 0/2] net: provides dim profile fine-tuning channels

2024-03-14 Thread Heng Qi
The NetDIM library provides excellent acceleration for many modern
network cards. However, the default profiles of DIM limits its maximum
capabilities for different NICs, so providing a channel through which
the NIC can be custom configured is necessary.

Please review, thank you very much!

Heng Qi (2):
  net: add sysfs attributes for customized dim profile management
  virtio-net: support net sysfs to fine-tune dim profile

 Documentation/ABI/testing/sysfs-class-net |  17 +++
 drivers/net/virtio_net.c  |  64 ++-
 include/linux/dim.h   |   7 ++
 include/linux/netdevice.h |  35 ++
 lib/dim/net_dim.c |   6 --
 net/core/net-sysfs.c  | 172 ++
 6 files changed, 294 insertions(+), 7 deletions(-)

-- 
1.8.3.1




[RFC PATCH 2/2] virtio-net: support net sysfs to fine-tune dim profile

2024-03-14 Thread Heng Qi
Virtio-net has different types of back-end device
implementations. In order to effectively optimize
the dim library's gains for different device
implementations, let's use the interface provided
by net-sysfs to fine-tune the profile list.

Signed-off-by: Heng Qi 
---
 drivers/net/virtio_net.c | 64 +++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e709d44..7fae737 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -57,6 +57,16 @@
 
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
+/* This is copied from NET_DIM_RX_EQE_PROFILES in DIM library */
+#define VIRTNET_DIM_RX_PKTS 256
+static struct dim_cq_moder rx_itr_conf[] = {
+   {.usec = 1,   .pkts = VIRTNET_DIM_RX_PKTS,},
+   {.usec = 8,   .pkts = VIRTNET_DIM_RX_PKTS,},
+   {.usec = 64,  .pkts = VIRTNET_DIM_RX_PKTS,},
+   {.usec = 128, .pkts = VIRTNET_DIM_RX_PKTS,},
+   {.usec = 256, .pkts = VIRTNET_DIM_RX_PKTS,}
+};
+
 static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6,
@@ -3584,7 +3594,10 @@ static void virtnet_rx_dim_work(struct work_struct *work)
if (!rq->dim_enabled)
continue;
 
-   update_moder = net_dim_get_rx_moderation(dim->mode, 
dim->profile_ix);
+   if (dim->profile_ix >= ARRAY_SIZE(rx_itr_conf))
+   dim->profile_ix = ARRAY_SIZE(rx_itr_conf) - 1;
+
+   update_moder = rx_itr_conf[dim->profile_ix];
if (update_moder.usec != rq->intr_coal.max_usecs ||
update_moder.pkts != rq->intr_coal.max_packets) {
err = virtnet_send_rx_ctrl_coal_vq_cmd(vi, qnum,
@@ -4170,6 +4183,53 @@ static void virtnet_tx_timeout(struct net_device *dev, 
unsigned int txqueue)
   jiffies_to_usecs(jiffies - READ_ONCE(txq->trans_start)));
 }
 
+static int virtnet_dim_moder_valid(struct net_device *dev, struct 
dim_profs_list *list)
+{
+   struct virtnet_info *vi = netdev_priv(dev);
+
+   if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
+   return -EOPNOTSUPP;
+
+   if (!list || list->direction != DIM_RX_DIRECTION ||
+   list->num != NET_DIM_PARAMS_NUM_PROFILES ||
+   list->mode != DIM_CQ_PERIOD_MODE_START_FROM_EQE) {
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int virtnet_dim_moder_get(struct net_device *dev, struct dim_profs_list 
*list)
+{
+   int ret;
+
+   ret = virtnet_dim_moder_valid(dev, list);
+   if (ret)
+   return ret;
+
+   memcpy(list->profs, rx_itr_conf, sizeof(*list->profs) * list->num);
+
+   return 0;
+}
+
+static int virtnet_dim_moder_set(struct net_device *dev, struct dim_profs_list 
*list)
+{
+   int i, ret;
+
+   ret = virtnet_dim_moder_valid(dev, list);
+   if (ret)
+   return ret;
+
+   for (i = 0; i < list->num; i++) {
+   rx_itr_conf[i].usec = list->profs[i].usec;
+   rx_itr_conf[i].pkts = list->profs[i].pkts;
+   if (list->profs[i].comps)
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 static const struct net_device_ops virtnet_netdev = {
.ndo_open= virtnet_open,
.ndo_stop= virtnet_close,
@@ -4186,6 +4246,8 @@ static void virtnet_tx_timeout(struct net_device *dev, 
unsigned int txqueue)
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
.ndo_set_features   = virtnet_set_features,
.ndo_tx_timeout = virtnet_tx_timeout,
+   .ndo_dim_moder_get  = virtnet_dim_moder_get,
+   .ndo_dim_moder_set  = virtnet_dim_moder_set,
 };
 
 static void virtnet_config_changed_work(struct work_struct *work)
-- 
1.8.3.1




Re: [PATCH v2 2/3] virtio_net: Add missing virtio header in skb for XDP_PASS

2024-01-24 Thread Heng Qi




在 2024/1/24 下午4:57, Liang Chen 写道:

For the XDP_PASS scenario of the XDP path, the skb constructed with
xdp_buff does not include the virtio header. Adding the virtio header
information back when creating the skb.

Signed-off-by: Liang Chen 
---
  drivers/net/virtio_net.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b56828804e5f..2de46eb4c661 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1270,6 +1270,9 @@ static struct sk_buff *receive_small_xdp(struct 
net_device *dev,
if (unlikely(!skb))
goto err;
  
+	/* Store the original virtio header for subsequent use by the driver. */

+   memcpy(skb_vnet_common_hdr(skb), _xdp.hdr, vi->hdr_len);


If xdp push or xdp pull modifies xdp_buff, will the original header 
still apply to the modified data?


Thanks,
Heng


+
if (metasize)
skb_metadata_set(skb, metasize);
  
@@ -1635,6 +1638,9 @@ static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,

head_skb = build_skb_from_xdp_buff(dev, vi, xdp, 
xdp_frags_truesz);
if (unlikely(!head_skb))
break;
+   /* Store the original virtio header for subsequent use by the 
driver. */
+   memcpy(skb_vnet_common_hdr(head_skb), _xdp.hdr, 
vi->hdr_len);
+
return head_skb;
  
  	case XDP_TX:





Re: [PATCH] virtio_net: Support RX hash XDP hint

2024-01-22 Thread Heng Qi

Hi Liang Chen,

在 2024/1/22 下午6:22, Liang Chen 写道:

The RSS hash report is a feature that's part of the virtio specification.
Currently, virtio backends like qemu, vdpa (mlx5), and potentially vhost
(still a work in progress as per [1]) support this feature. While the
capability to obtain the RSS hash has been enabled in the normal path,
it's currently missing in the XDP path. Therefore, we are introducing XDP
hints through kfuncs to allow XDP programs to access the RSS hash.

Signed-off-by: Liang Chen 
---
  drivers/net/virtio_net.c | 56 
  1 file changed, 56 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d7ce4a1011ea..1463a4709e3c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4579,6 +4579,60 @@ static void virtnet_set_big_packets(struct virtnet_info 
*vi, const int mtu)
}
  }
  
+static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,

+  enum xdp_rss_hash_type *rss_type)
+{
+   const struct xdp_buff *xdp = (void *)_ctx;
+   struct virtio_net_hdr_v1_hash *hdr_hash;
+   struct virtnet_info *vi;
+
+   if (!(xdp->rxq->dev->features & NETIF_F_RXHASH))


I think 'vi->has_rss_hash_report' should be used here.
NETIF_F_RXHASH cannot guarantee that the hash report feature is negotiated,
and accessing hash_report and hash_value is unsafe at this time.


+   return -ENODATA;
+
+   vi = netdev_priv(xdp->rxq->dev);
+   hdr_hash = (struct virtio_net_hdr_v1_hash *)(xdp->data - vi->hdr_len);


If the virtio-net-hdr is overrided by the XDP prog, how can this be done 
correctly and as expected?


Thanks,
Heng


+
+   switch (__le16_to_cpu(hdr_hash->hash_report)) {
+   case VIRTIO_NET_HASH_REPORT_TCPv4:
+   *rss_type = XDP_RSS_TYPE_L4_IPV4_TCP;
+   break;
+   case VIRTIO_NET_HASH_REPORT_UDPv4:
+   *rss_type = XDP_RSS_TYPE_L4_IPV4_UDP;
+   break;
+   case VIRTIO_NET_HASH_REPORT_TCPv6:
+   *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP;
+   break;
+   case VIRTIO_NET_HASH_REPORT_UDPv6:
+   *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP;
+   break;
+   case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
+   *rss_type = XDP_RSS_TYPE_L4_IPV6_TCP_EX;
+   break;
+   case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
+   *rss_type = XDP_RSS_TYPE_L4_IPV6_UDP_EX;
+   break;
+   case VIRTIO_NET_HASH_REPORT_IPv4:
+   *rss_type = XDP_RSS_TYPE_L3_IPV4;
+   break;
+   case VIRTIO_NET_HASH_REPORT_IPv6:
+   *rss_type = XDP_RSS_TYPE_L3_IPV6;
+   break;
+   case VIRTIO_NET_HASH_REPORT_IPv6_EX:
+   *rss_type = XDP_RSS_TYPE_L3_IPV6_EX;
+   break;
+   case VIRTIO_NET_HASH_REPORT_NONE:
+   default:
+   *rss_type = XDP_RSS_TYPE_NONE;
+   }
+
+   *hash = __le32_to_cpu(hdr_hash->hash_value);
+   return 0;
+}
+
+static const struct xdp_metadata_ops virtnet_xdp_metadata_ops = {
+   .xmo_rx_hash= virtnet_xdp_rx_hash,
+};
+
  static int virtnet_probe(struct virtio_device *vdev)
  {
int i, err = -ENOMEM;
@@ -4613,6 +4667,8 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->ethtool_ops = _ethtool_ops;
SET_NETDEV_DEV(dev, >dev);
  
+	dev->xdp_metadata_ops = _xdp_metadata_ops;

+
/* Do we support "hardware" checksums? */
if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
/* This opens up the world of extra features. */