Mellanox ConnectX-3 dual port SRIOV NICs present a bit of a challenge
when assigning one of their VFs to a guest using VFIO device
assignment.

These NICs have only a single PCI PF device, and that single PF has
two netdevs sharing the single PCI address - one for port 1 and one
for port 2. When a VF is created it can also have 2 netdevs, or it can
be setup in "single port" mode, where the VF has only a single netdev,
and that netdev is connected either to port 1 or to port 2.

When the VF is created in dual port mode, you get/set the MAC
address/vlan tag for the port 1 VF by sending a netlink message to the
PF's port1 netdev, and you get/set the MAC address/vlan tag for the
port 2 VF by sending a netlink message to the PF's port 2 netdev. (Of
course libvirt doesn't have any way to describe MAC/vlan info for 2
ports in a single hostdev interface, so that's a bit of a moot point)

When the VF is created in single port mode, you can *set* the MAC/vlan
info by sending a netlink message to *either* PF netdev - the driver
is smart enough to understand that there's only a single netdev, and
set the MAC/vlan for that netdev. When you want to *get* it, however,
the driver is more accurate - it will return 00:00:00:00:00:00 for the
MAC if you request it from the port 1 PF netdev when the VF was
configured to be single port on port 2, or if you request if from the
port 2 PF netdev when the VF was configured to be single port on port
1.

Based on this information, when *getting* the MAC/vlan info (to save
the original setting prior to assignment), we determine the correct PF
netdev by matching phys_port_id between VF and PF.

(IMPORTANT NOTE: this implies that to do PCI device assignment of the
VFs on dual port Mellanox cards using <interface type='hostdev'>
(i.e. if you want the MAC address/vlan tag to be set), not only must
the VFs be configured in single port mode, but also the VFs *must* be
bound to the host VF net driver, and libvirt must use managed='yes')

By the time libvirt is ready to set the new MAC/vlan tag, the VF has
already been unbound from the host net driver and bound to
vfio-pci. This isn't problematic though because, as stated earlier,
when a VF is created in single port mode, commands to configure it can
be sent to either the port 1 PF netdev or the port 2 PF netdev.

When it is time to restore the original MAC/vlan tag, again the VF
will *not* be bound to a host net driver, so it won't be possible to
learn from sysfs whether to use the port 1 or port 2 PF netdev for the
netlink commands. And again, it doesn't matter which netdev you
use. However, we must keep in mind that we saved the original settings
to a file called "${PF}_${VFNUM}". To solve this problem, we just
check for the existence of ${PF1}_${VFNUM} and ${PF2}_${VFNUM}, and
use whichever one we find (since we know that only one can be there)
---

New in V2

 src/util/virhostdev.c | 27 +++++++++++++++++++++------
 src/util/virpci.c     | 31 +++++++++++++++++++++++++++++--
 src/util/virpci.h     |  4 +++-
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c
index 580f0fac0..102fd85c1 100644
--- a/src/util/virhostdev.c
+++ b/src/util/virhostdev.c
@@ -307,7 +307,9 @@ virHostdevIsVirtualFunction(virDomainHostdevDefPtr hostdev)
 
 
 static int
-virHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev,
+virHostdevNetDevice(virDomainHostdevDefPtr hostdev,
+                    int pfNetDevIdx,
+                    char **linkdev,
                     int *vf)
 {
     int ret = -1;
@@ -317,9 +319,10 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev, char 
**linkdev,
         return ret;
 
     if (virPCIIsVirtualFunction(sysfs_path) == 1) {
-        if (virPCIGetVirtualFunctionInfo(sysfs_path, linkdev,
-                                         vf) < 0)
+        if (virPCIGetVirtualFunctionInfo(sysfs_path, pfNetDevIdx,
+                                         linkdev, vf) < 0) {
             goto cleanup;
+        }
     } else {
         /* In practice this should never happen, since we currently
          * only support assigning SRIOV VFs via <interface
@@ -444,7 +447,7 @@ virHostdevSaveNetConfig(virDomainHostdevDefPtr hostdev,
         goto cleanup;
     }
 
-    if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
+    if (virHostdevNetDevice(hostdev, -1, &linkdev, &vf) < 0)
         goto cleanup;
 
     if (virNetDevSaveNetConfig(linkdev, vf, stateDir, true) < 0)
@@ -482,7 +485,7 @@ virHostdevSetNetConfig(virDomainHostdevDefPtr hostdev,
     if (!virHostdevIsPCINetDevice(hostdev))
         return 0;
 
-    if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
+    if (virHostdevNetDevice(hostdev, -1, &linkdev, &vf) < 0)
         goto cleanup;
 
     vlan = virDomainNetGetActualVlan(hostdev->parent.data.net);
@@ -545,7 +548,7 @@ virHostdevRestoreNetConfig(virDomainHostdevDefPtr hostdev,
         return ret;
     }
 
-    if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
+    if (virHostdevNetDevice(hostdev, 0, &linkdev, &vf) < 0)
         return ret;
 
     virtPort = virDomainNetGetActualVirtPortProfile(
@@ -565,6 +568,18 @@ virHostdevRestoreNetConfig(virDomainHostdevDefPtr hostdev,
             ret = virNetDevReadNetConfig(linkdev, vf, oldStateDir,
                                          &adminMAC, &vlan, &MAC);
 
+        if (ret < 0) {
+            /* see if the config was saved using the PF's "port 2"
+             * netdev for the file name.
+             */
+            VIR_FREE(linkdev);
+
+            if (virHostdevNetDevice(hostdev, 1, &linkdev, &vf) >= 0) {
+                ret = virNetDevReadNetConfig(linkdev, vf, stateDir,
+                                             &adminMAC, &vlan, &MAC);
+            }
+        }
+
         if (ret == 0) {
             /* if a MAC was stored for the VF, we should now restore
              * that as the adminMAC. We have to do it this way because
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 62a36b380..5ded77087 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -2935,10 +2935,14 @@ virPCIGetNetName(const char *device_link_sysfs_path,
 
 int
 virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
-                             char **pfname, int *vf_index)
+                             int pfNetDevIdx,
+                             char **pfname,
+                             int *vf_index)
 {
     virPCIDeviceAddressPtr pf_config_address = NULL;
     char *pf_sysfs_device_path = NULL;
+    char *vfname = NULL;
+    char *vfPhysPortID = NULL;
     int ret = -1;
 
     if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 
0)
@@ -2957,8 +2961,28 @@ virPCIGetVirtualFunctionInfo(const char 
*vf_sysfs_device_path,
         goto cleanup;
     }
 
-    if (virPCIGetNetName(pf_sysfs_device_path, 0, NULL, pfname) < 0)
+    /* If the caller hasn't asked for a specific pfNetDevIdx, and VF
+     * is bound to a netdev, learn that netdev's phys_port_id (if
+     * available). This can be used to disambiguate when the PF has
+     * multiple netdevs. If the VF isn't bound to a netdev, then we
+     * return netdev[pfNetDevIdx] on the PF, which may or may not be
+     * correct.
+     */
+    if (pfNetDevIdx == -1) {
+        if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
+            goto cleanup;
+
+        if (vfname) {
+            if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0)
+                goto cleanup;
+        }
+        pfNetDevIdx = 0;
+    }
+
+    if (virPCIGetNetName(pf_sysfs_device_path,
+                         pfNetDevIdx, vfPhysPortID, pfname) < 0) {
         goto cleanup;
+    }
 
     if (!*pfname) {
         /* this shouldn't be possible. A VF can't exist unless its
@@ -2974,6 +2998,8 @@ virPCIGetVirtualFunctionInfo(const char 
*vf_sysfs_device_path,
  cleanup:
     VIR_FREE(pf_config_address);
     VIR_FREE(pf_sysfs_device_path);
+    VIR_FREE(vfname);
+    VIR_FREE(vfPhysPortID);
 
     return ret;
 }
@@ -3044,6 +3070,7 @@ virPCIGetNetName(const char *device_link_sysfs_path 
ATTRIBUTE_UNUSED,
 
 int
 virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED,
+                             int pfNetDevIdx ATTRIBUTE_UNUSED,
                              char **pfname ATTRIBUTE_UNUSED,
                              int *vf_index ATTRIBUTE_UNUSED)
 {
diff --git a/src/util/virpci.h b/src/util/virpci.h
index adf336706..f1fbe39e6 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -226,7 +226,9 @@ int virPCIGetAddrString(unsigned int domain,
 int virPCIDeviceAddressParse(char *address, virPCIDeviceAddressPtr bdf);
 
 int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
-                                 char **pfname, int *vf_index);
+                                 int pfNetDevIdx,
+                                 char **pfname,
+                                 int *vf_index);
 
 int virPCIDeviceUnbind(virPCIDevicePtr dev);
 int virPCIDeviceRebind(virPCIDevicePtr dev);
-- 
2.13.3

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Reply via email to