The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/6788
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === Includes https://github.com/lxc/lxd/pull/6768
From c04da5d4aac94bc859d76a8b355cb09a0d4f3603 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Thu, 23 Jan 2020 17:20:06 +0000 Subject: [PATCH 01/15] lxd/container/lxc: Removes VM specific NIC config ignoring As no longer returned for containers. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/container_lxc.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go index 895ca69ee9..0a4497f47e 100644 --- a/lxd/container_lxc.go +++ b/lxd/container_lxc.go @@ -2171,10 +2171,6 @@ func (c *containerLXC) startCommon() (string, []func() error, error) { } for _, nicItem := range runConf.NetworkInterface { - if nicItem.Key == "devName" { - // Skip internal device name key, not used by liblxc. - continue - } err = lxcSetConfigItem(c.c, fmt.Sprintf("%s.%d.%s", networkKeyPrefix, nicID, nicItem.Key), nicItem.Value) if err != nil { return "", postStartHooks, errors.Wrapf(err, "Failed to setup device network interface '%s'", dev.Name) From ac6520f5ad03e4a2a89309060bb8de8cf0eccf0d Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Thu, 23 Jan 2020 17:20:56 +0000 Subject: [PATCH 02/15] lxd/device: Only return devName NIC config item for VMs Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/infiniband_physical.go | 8 +++++++- lxd/device/infiniband_sriov.go | 1 - lxd/device/nic_bridged.go | 7 ++++--- lxd/device/nic_macvlan.go | 7 ++++--- lxd/device/nic_p2p.go | 7 ++++--- lxd/device/nic_physical.go | 8 +++++++- lxd/device/nic_sriov.go | 1 - 7 files changed, 26 insertions(+), 13 deletions(-) diff --git a/lxd/device/infiniband_physical.go b/lxd/device/infiniband_physical.go index 6c6947b5bc..482f39d817 100644 --- a/lxd/device/infiniband_physical.go +++ b/lxd/device/infiniband_physical.go @@ -116,13 +116,19 @@ func (d *infinibandPhysical) Start() (*deviceConfig.RunConfig, error) { } runConf.NetworkInterface = []deviceConfig.RunConfigItem{ - {Key: "devName", Value: d.name}, {Key: "name", Value: d.config["name"]}, {Key: "type", Value: "phys"}, {Key: "flags", Value: "up"}, {Key: "link", Value: saveData["host_name"]}, } + if d.inst.Type() == instancetype.VM { + runConf.NetworkInterface = append(runConf.NetworkInterface, + []deviceConfig.RunConfigItem{ + {Key: "devName", Value: d.name}, + }...) + } + return &runConf, nil } diff --git a/lxd/device/infiniband_sriov.go b/lxd/device/infiniband_sriov.go index cb673e5ee5..b6932e9753 100644 --- a/lxd/device/infiniband_sriov.go +++ b/lxd/device/infiniband_sriov.go @@ -138,7 +138,6 @@ func (d *infinibandSRIOV) Start() (*deviceConfig.RunConfig, error) { } runConf.NetworkInterface = []deviceConfig.RunConfigItem{ - {Key: "devName", Value: d.name}, {Key: "name", Value: d.config["name"]}, {Key: "type", Value: "phys"}, {Key: "flags", Value: "up"}, diff --git a/lxd/device/nic_bridged.go b/lxd/device/nic_bridged.go index dd1e534799..7633dac620 100644 --- a/lxd/device/nic_bridged.go +++ b/lxd/device/nic_bridged.go @@ -176,7 +176,6 @@ func (d *nicBridged) Start() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{} runConf.NetworkInterface = []deviceConfig.RunConfigItem{ - {Key: "devName", Value: d.name}, {Key: "name", Value: d.config["name"]}, {Key: "type", Value: "phys"}, {Key: "flags", Value: "up"}, @@ -185,8 +184,10 @@ func (d *nicBridged) Start() (*deviceConfig.RunConfig, error) { if d.inst.Type() == instancetype.VM { runConf.NetworkInterface = append(runConf.NetworkInterface, - deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]}, - ) + []deviceConfig.RunConfigItem{ + {Key: "devName", Value: d.name}, + {Key: "hwaddr", Value: d.config["hwaddr"]}, + }...) } return &runConf, nil diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go index 43cb93a651..131f5d8bdc 100644 --- a/lxd/device/nic_macvlan.go +++ b/lxd/device/nic_macvlan.go @@ -134,7 +134,6 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{} runConf.NetworkInterface = []deviceConfig.RunConfigItem{ - {Key: "devName", Value: d.name}, {Key: "name", Value: d.config["name"]}, {Key: "type", Value: "phys"}, {Key: "flags", Value: "up"}, @@ -143,8 +142,10 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) { if d.inst.Type() == instancetype.VM { runConf.NetworkInterface = append(runConf.NetworkInterface, - deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]}, - ) + []deviceConfig.RunConfigItem{ + {Key: "devName", Value: d.name}, + {Key: "hwaddr", Value: d.config["hwaddr"]}, + }...) } revert.Success() diff --git a/lxd/device/nic_p2p.go b/lxd/device/nic_p2p.go index 0bfa97462f..2043597110 100644 --- a/lxd/device/nic_p2p.go +++ b/lxd/device/nic_p2p.go @@ -97,7 +97,6 @@ func (d *nicP2P) Start() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{} runConf.NetworkInterface = []deviceConfig.RunConfigItem{ - {Key: "devName", Value: d.name}, {Key: "name", Value: d.config["name"]}, {Key: "type", Value: "phys"}, {Key: "flags", Value: "up"}, @@ -106,8 +105,10 @@ func (d *nicP2P) Start() (*deviceConfig.RunConfig, error) { if d.inst.Type() == instancetype.VM { runConf.NetworkInterface = append(runConf.NetworkInterface, - deviceConfig.RunConfigItem{Key: "hwaddr", Value: d.config["hwaddr"]}, - ) + []deviceConfig.RunConfigItem{ + {Key: "devName", Value: d.name}, + {Key: "hwaddr", Value: d.config["hwaddr"]}, + }...) } return &runConf, nil diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go index 54d98c6a0d..5a043db9e0 100644 --- a/lxd/device/nic_physical.go +++ b/lxd/device/nic_physical.go @@ -111,13 +111,19 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{} runConf.NetworkInterface = []deviceConfig.RunConfigItem{ - {Key: "devName", Value: d.name}, {Key: "name", Value: d.config["name"]}, {Key: "type", Value: "phys"}, {Key: "flags", Value: "up"}, {Key: "link", Value: saveData["host_name"]}, } + if d.inst.Type() == instancetype.VM { + runConf.NetworkInterface = append(runConf.NetworkInterface, + []deviceConfig.RunConfigItem{ + {Key: "devName", Value: d.name}, + }...) + } + return &runConf, nil } diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go index b9fb4a8f30..d57c52604c 100644 --- a/lxd/device/nic_sriov.go +++ b/lxd/device/nic_sriov.go @@ -113,7 +113,6 @@ func (d *nicSRIOV) Start() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{} runConf.NetworkInterface = []deviceConfig.RunConfigItem{ - {Key: "devName", Value: d.name}, {Key: "name", Value: d.config["name"]}, {Key: "type", Value: "phys"}, {Key: "flags", Value: "up"}, From 92208250966e5d8ce179799a190ec97010d8c1b6 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Thu, 23 Jan 2020 17:50:22 +0000 Subject: [PATCH 03/15] lxd/device/nic/physical: Improves revert and deletion of created VLAN devices Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/nic_physical.go | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go index 5a043db9e0..e8470de7ef 100644 --- a/lxd/device/nic_physical.go +++ b/lxd/device/nic_physical.go @@ -5,6 +5,7 @@ import ( deviceConfig "github.com/lxc/lxd/lxd/device/config" "github.com/lxc/lxd/lxd/instance/instancetype" + "github.com/lxc/lxd/lxd/revert" "github.com/lxc/lxd/shared" ) @@ -62,6 +63,9 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) { saveData := make(map[string]string) + revert := revert.New() + defer revert.Fail() + // Record the host_name device used for restoration later. saveData["host_name"] = NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"]) @@ -72,16 +76,15 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) { // Record whether we created this device or not so it can be removed on stop. saveData["last_state.created"] = fmt.Sprintf("%t", statusDev != "existing") - // If we return from this function with an error, ensure we clean up created device. - defer func() { - if err != nil && statusDev == "created" { - NetworkRemoveInterface(saveData["host_name"]) - } - }() + if shared.IsTrue(saveData["last_state.created"]) { + revert.Add(func() { + NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"]) + }) + } - // If we didn't create the device we should track various properties so we can - // restore them when the instance is stopped or the device is detached. - if statusDev == "existing" { + // If we didn't create the device we should track various properties so we can restore them when the + // instance is stopped or the device is detached. + if !shared.IsTrue(saveData["last_state.created"]) { err = networkSnapshotPhysicalNic(saveData["host_name"], saveData) if err != nil { return nil, err @@ -124,6 +127,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) { }...) } + revert.Success() return &runConf, nil } @@ -151,9 +155,18 @@ func (d *nicPhysical) postStop() error { v := d.volatileGet() hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) - err := networkRestorePhysicalNic(hostName, v) - if err != nil { - return err + + // This will delete the parent interface if we created it for VLAN parent. + if shared.IsTrue(v["last_state.created"]) { + err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"]) + if err != nil { + return err + } + } else { + err := networkRestorePhysicalNic(hostName, v) + if err != nil { + return err + } } return nil From 07883a1918ce975b857b6c5ddc72efb59c8bb72d Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Thu, 23 Jan 2020 17:51:13 +0000 Subject: [PATCH 04/15] lxd/instance/drivers/driver/qemu/templates: Clarifies qemuNetdevPhysical variables Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/instance/drivers/driver_qemu_templates.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/instance/drivers/driver_qemu_templates.go b/lxd/instance/drivers/driver_qemu_templates.go index af15de4f6a..a557dc127a 100644 --- a/lxd/instance/drivers/driver_qemu_templates.go +++ b/lxd/instance/drivers/driver_qemu_templates.go @@ -220,6 +220,6 @@ var qemuNetdevPhysical = template.Must(template.New("qemuNetdevPhysical").Parse( # Network card ("{{.devName}}" device) [device "dev-lxd_{{.devName}}"] driver = "vfio-pci" -host = "{{.host}}" +host = "{{.pciSlotName}}" bootindex = "{{.bootIndex}}" `)) From 3a7188c004499cc087af9bd3939be215fb61e18a Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 09:30:12 +0000 Subject: [PATCH 05/15] lxd/device/nic/macvlan: Differentiates config parent from actual parent Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/nic_macvlan.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go index 131f5d8bdc..65401ebcc9 100644 --- a/lxd/device/nic_macvlan.go +++ b/lxd/device/nic_macvlan.go @@ -67,13 +67,13 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) { saveData := make(map[string]string) // Decide which parent we should use based on VLAN setting. - parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) + actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) // Record the temporary device name used for deletion later. saveData["host_name"] = NetworkRandomDevName("mac") // Create VLAN parent device if needed. - statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], parentName, d.config["vlan"]) + statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], actualParentName, d.config["vlan"]) if err != nil { return nil, err } @@ -83,19 +83,19 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) { if shared.IsTrue(saveData["last_state.created"]) { revert.Add(func() { - NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"]) + NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"]) }) } if d.inst.Type() == instancetype.Container { // Create MACVLAN interface. - _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", parentName, "type", "macvlan", "mode", "bridge") + _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", actualParentName, "type", "macvlan", "mode", "bridge") if err != nil { return nil, err } } else if d.inst.Type() == instancetype.VM { // Create MACVTAP interface. - _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", parentName, "type", "macvtap", "mode", "bridge") + _, err = shared.RunCommand("ip", "link", "add", "dev", saveData["host_name"], "link", actualParentName, "type", "macvtap", "mode", "bridge") if err != nil { return nil, err } @@ -187,8 +187,8 @@ func (d *nicMACVLAN) postStop() error { // This will delete the parent interface if we created it for VLAN parent. if shared.IsTrue(v["last_state.created"]) { - parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) - err := NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"]) + actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) + err := NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"]) if err != nil { errs = append(errs, err) } From 4f4d0837f2b1264b6957809e2afd17d128999406 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 09:31:09 +0000 Subject: [PATCH 06/15] lxd/device/device/utils/network: Adds networkGetDevicePCIDevice function Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/device_utils_network.go | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go index b2fc04fbd0..a37b69aaef 100644 --- a/lxd/device/device_utils_network.go +++ b/lxd/device/device_utils_network.go @@ -783,3 +783,47 @@ func networkParsePortRange(r string) (int64, int64, error) { return base, size, nil } + +// pciDevice represents info about a PCI uevent device. +type pciDevice struct { + ID string + SlotName string + Driver string +} + +// networkGetDevicePCISlot returns the PCI device info for a given uevent file. +func networkGetDevicePCIDevice(ueventFilePath string) (pciDevice, error) { + dev := pciDevice{} + + file, err := os.Open(ueventFilePath) + if err != nil { + return dev, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + // Looking for something like this "PCI_SLOT_NAME=0000:05:10.0" + fields := strings.SplitN(scanner.Text(), "=", 2) + if len(fields) == 2 { + if fields[0] == "PCI_SLOT_NAME" { + dev.SlotName = fields[1] + } else if fields[0] == "PCI_ID" { + dev.ID = fields[1] + } else if fields[0] == "DRIVER" { + dev.Driver = fields[1] + } + } + } + + err = scanner.Err() + if err != nil { + return dev, err + } + + if dev.SlotName == "" { + return dev, fmt.Errorf("Device uevent file could not be parsed") + } + + return dev, nil +} From 5c97f7bd686ff02afd33d7862a9731e6b8d52eca Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 09:31:47 +0000 Subject: [PATCH 07/15] lxd/device/nic/sriov: Updates networkGetVFDevicePCISlot to use networkGetDevicePCIDevice Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/nic_sriov.go | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go index d57c52604c..03e2a63ccb 100644 --- a/lxd/device/nic_sriov.go +++ b/lxd/device/nic_sriov.go @@ -5,7 +5,6 @@ import ( "bytes" "fmt" "io/ioutil" - "os" "os/exec" "path/filepath" "regexp" @@ -483,27 +482,13 @@ func (d *nicSRIOV) networkGetVirtFuncInfo(devName string, vfID int) (vf virtFunc // networkGetVFDevicePCISlot returns the PCI slot name for a network virtual function device. func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (string, error) { - file, err := os.Open(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID)) + ueventFile := fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID) + pciDev, err := networkGetDevicePCIDevice(ueventFile) if err != nil { return "", err } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - // Looking for something like this "PCI_SLOT_NAME=0000:05:10.0" - fields := strings.SplitN(scanner.Text(), "=", 2) - if len(fields) == 2 && fields[0] == "PCI_SLOT_NAME" { - return fields[1], nil - } - } - - err = scanner.Err() - if err != nil { - return "", err - } - - return "", fmt.Errorf("PCI_SLOT_NAME not found") + return pciDev.SlotName, nil } // networkGetVFDeviceDriverPath returns the path to the network virtual function device driver in /sys. From bc95615a83476260c594db61660ed686c7e5e038 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 09:36:00 +0000 Subject: [PATCH 08/15] lxd/instance/drivers/driver/qemu: Adds physical NIC passthrough support Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/instance/drivers/driver_qemu.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index ef0f5dc99b..3495aed123 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -1457,7 +1457,7 @@ func (vm *qemu) addDriveConfig(sb *strings.Builder, bootIndexes map[string]int, // addNetDevConfig adds the qemu config required for adding a network device. func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes map[string]int, nicConfig []deviceConfig.RunConfigItem, fdFiles *[]string) error { - var devName, nicName, devHwaddr string + var devName, nicName, devHwaddr, pciSlotName string for _, nicItem := range nicConfig { if nicItem.Key == "devName" { devName = nicItem.Value @@ -1465,6 +1465,8 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes m nicName = nicItem.Value } else if nicItem.Key == "hwaddr" { devHwaddr = nicItem.Value + } else if nicItem.Key == "pciSlotName" { + pciSlotName = nicItem.Value } } @@ -1499,6 +1501,10 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, nicIndex int, bootIndexes m // Detect TAP (via TUN driver) device. tplFields["ifName"] = nicName tpl = qemuNetDevTapTun + } else if pciSlotName != "" { + // Detect physical passthrough device. + tplFields["pciSlotName"] = pciSlotName + tpl = qemuNetdevPhysical } if tpl != nil { From 8065e5061685cfbe10386f968faa13e33aa3be1a Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 12:01:48 +0000 Subject: [PATCH 09/15] shared/instance: Updates config key checker to allow ".driver" keys Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- shared/instance.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/shared/instance.go b/shared/instance.go index 1264e991c7..7830c0e806 100644 --- a/shared/instance.go +++ b/shared/instance.go @@ -418,6 +418,10 @@ func ConfigKeyChecker(key string) (func(value string) error, error) { if strings.HasSuffix(key, ".ceph_rbd") { return IsAny, nil } + + if strings.HasSuffix(key, ".driver") { + return IsAny, nil + } } if strings.HasPrefix(key, "environment.") { From 7efa6875cd943c7f1fcd4040611c926167d8a5de Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Mon, 27 Jan 2020 16:09:13 +0000 Subject: [PATCH 10/15] lxd/device/device/utils/network: Adds generic PCI device bind/unbind functions To be used with both physical VM NICs and sriov NICs. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/device_utils_network.go | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go index a37b69aaef..0d419a1588 100644 --- a/lxd/device/device_utils_network.go +++ b/lxd/device/device_utils_network.go @@ -12,6 +12,7 @@ import ( "strconv" "strings" "sync" + "time" "github.com/pkg/errors" @@ -827,3 +828,53 @@ func networkGetDevicePCIDevice(ueventFilePath string) (pciDevice, error) { return dev, nil } + +// networkDeviceUnbind unbinds a network device from the OS using its PCI Slot Name and driver name. +func networkDeviceUnbind(pciDev pciDevice) error { + driverUnbindPath := fmt.Sprintf("/sys/bus/pci/drivers/%s/unbind", pciDev.Driver) + err := ioutil.WriteFile(driverUnbindPath, []byte(pciDev.SlotName), 0600) + if err != nil { + return errors.Wrapf(err, "Failed unbinding device %q via %q", pciDev.SlotName, driverUnbindPath) + } + + return nil +} + +// networkDeviceBind binds a network device to the OS using its PCI Slot Name and driver name. +func networkDeviceBind(pciDev pciDevice) error { + driverBindPath := fmt.Sprintf("/sys/bus/pci/drivers/%s/bind", pciDev.Driver) + err := ioutil.WriteFile(driverBindPath, []byte(pciDev.SlotName), 0600) + if err != nil { + return errors.Wrapf(err, "Failed binding device %q via %q", pciDev.SlotName, driverBindPath) + } + + return nil +} + +// networkDeviceBindWait waits for network device to appear after being binded to a driver. +func networkDeviceBindWait(pciDev pciDevice) error { + devicePath := fmt.Sprintf("/sys/bus/pci/drivers/%s/%s", pciDev.Driver, pciDev.SlotName) + + for i := 0; i < 10; i++ { + if shared.PathExists(devicePath) { + return nil + } + + time.Sleep(50 * time.Millisecond) + } + + return fmt.Errorf("Bind of device %q took too long", devicePath) +} + +// networkInterfaceBindWait waits for network interface to appear after being binded to a driver. +func networkInterfaceBindWait(ifName string) error { + for i := 0; i < 10; i++ { + if shared.PathExists(fmt.Sprintf("/sys/class/net/%s", ifName)) { + return nil + } + + time.Sleep(50 * time.Millisecond) + } + + return fmt.Errorf("Bind of interface %q took too long", ifName) +} From 829ce0264eef50f267fcba31848ccdd0f596d2d1 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Mon, 27 Jan 2020 17:07:15 +0000 Subject: [PATCH 11/15] lxd/device/device/utils/network: Adds networkVFIOPCIRegister Allows a PCI device to be registered with the vfio-pci driver. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/device_utils_network.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go index 0d419a1588..289bb8efe3 100644 --- a/lxd/device/device_utils_network.go +++ b/lxd/device/device_utils_network.go @@ -878,3 +878,21 @@ func networkInterfaceBindWait(ifName string) error { return fmt.Errorf("Bind of interface %q took too long", ifName) } + +// networkVFIOPCIRegister registers the PCI device with the VFIO-PCI driver. +// Should also bind the device to the vfio-pci driver if it is present. Requires the vfio-pci module is loaded. +func networkVFIOPCIRegister(pciDev pciDevice) error { + // vfio-pci module takes device IDs as "n n" but networkGetDevicePCIDevice returns them as "n:n". + devIDParts := strings.SplitN(pciDev.ID, ":", 2) + if len(devIDParts) < 2 { + return fmt.Errorf("Invalid device ID from %q", pciDev.ID) + } + + vfioPCINewIDPath := "/sys/bus/pci/drivers/vfio-pci/new_id" + err := ioutil.WriteFile(vfioPCINewIDPath, []byte(fmt.Sprintf("%s %s", devIDParts[0], devIDParts[1])), 0600) + if err != nil { + return errors.Wrapf(err, "Failed registering PCI device ID %q to %q", pciDev.ID, vfioPCINewIDPath) + } + + return nil +} From 3a610429a8f8d37f98162aabb0dd820844cac6db Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Mon, 27 Jan 2020 16:10:51 +0000 Subject: [PATCH 12/15] lxd/device/nic/sriov: Switches PCI device bind/unbind to generic functions Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/nic_sriov.go | 70 ++++++++--------------------------------- 1 file changed, 13 insertions(+), 57 deletions(-) diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go index 03e2a63ccb..10a111ef6f 100644 --- a/lxd/device/nic_sriov.go +++ b/lxd/device/nic_sriov.go @@ -6,11 +6,9 @@ import ( "fmt" "io/ioutil" "os/exec" - "path/filepath" "regexp" "strconv" "strings" - "time" deviceConfig "github.com/lxc/lxd/lxd/device/config" "github.com/lxc/lxd/lxd/instance/instancetype" @@ -336,27 +334,20 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri } // Get VF device's PCI Slot Name so we can unbind and rebind it from the host. - vfPCISlot, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"]) - if err != nil { - return err - } - - // Get the path to the VF device's driver now, as once it is unbound we won't be able to - // determine its driver path in order to rebind it. - vfDriverPath, err := d.networkGetVFDeviceDriverPath(volatile["last_state.vf.id"]) + vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"]) if err != nil { return err } // Unbind VF device from the host so that the settings will take effect when we rebind it. - err = d.networkDeviceUnbind(vfPCISlot, vfDriverPath) + err = networkDeviceUnbind(vfPCIDev) if err != nil { return err } // However we return from this function, we must try to rebind the VF so its not orphaned. // The OS won't let an already bound device be bound again so is safe to call twice. - defer d.networkDeviceBind(vfPCISlot, vfDriverPath) + defer networkDeviceBind(vfPCIDev) // Setup VF VLAN if specified. if d.config["vlan"] != "" { @@ -402,7 +393,7 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri } // Bind VF device onto the host so that the settings will take effect. - err = d.networkDeviceBind(vfPCISlot, vfDriverPath) + err = networkDeviceBind(vfPCIDev) if err != nil { return err } @@ -411,7 +402,7 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri // it will re-appear shortly after. Unfortunately the time between sending the bind event // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait, // otherwise next steps of applying settings to interface will fail. - err = d.networkDeviceBindWait(volatile["host_name"]) + err = networkInterfaceBindWait(volatile["host_name"]) if err != nil { return err } @@ -481,42 +472,14 @@ func (d *nicSRIOV) networkGetVirtFuncInfo(devName string, vfID int) (vf virtFunc } // networkGetVFDevicePCISlot returns the PCI slot name for a network virtual function device. -func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (string, error) { +func (d *nicSRIOV) networkGetVFDevicePCISlot(vfID string) (pciDevice, error) { ueventFile := fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/uevent", d.config["parent"], vfID) pciDev, err := networkGetDevicePCIDevice(ueventFile) if err != nil { - return "", err + return pciDev, err } - return pciDev.SlotName, nil -} - -// networkGetVFDeviceDriverPath returns the path to the network virtual function device driver in /sys. -func (d *nicSRIOV) networkGetVFDeviceDriverPath(vfID string) (string, error) { - return filepath.EvalSymlinks(fmt.Sprintf("/sys/class/net/%s/device/virtfn%s/driver", d.config["parent"], vfID)) -} - -// networkDeviceUnbind unbinds a network device from the OS using its PCI Slot Name and driver path. -func (d *nicSRIOV) networkDeviceUnbind(pciSlotName string, driverPath string) error { - return ioutil.WriteFile(fmt.Sprintf("%s/unbind", driverPath), []byte(pciSlotName), 0600) -} - -// networkDeviceUnbind binds a network device to the OS using its PCI Slot Name and driver path. -func (d *nicSRIOV) networkDeviceBind(pciSlotName string, driverPath string) error { - return ioutil.WriteFile(fmt.Sprintf("%s/bind", driverPath), []byte(pciSlotName), 0600) -} - -// networkDeviceBindWait waits for network interface to appear after being binded. -func (d *nicSRIOV) networkDeviceBindWait(devName string) error { - for i := 0; i < 10; i++ { - if shared.PathExists(fmt.Sprintf("/sys/class/net/%s", devName)) { - return nil - } - - time.Sleep(50 * time.Millisecond) - } - - return fmt.Errorf("Bind of interface \"%s\" took too long", devName) + return pciDev, nil } // restoreSriovParent restores SR-IOV parent device settings when removed from an instance using the @@ -528,27 +491,20 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error { } // Get VF device's PCI Slot Name so we can unbind and rebind it from the host. - vfPCISlot, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"]) - if err != nil { - return err - } - - // Get the path to the VF device's driver now, as once it is unbound we won't be able to - // determine its driver path in order to rebind it. - vfDriverPath, err := d.networkGetVFDeviceDriverPath(volatile["last_state.vf.id"]) + vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"]) if err != nil { return err } // Unbind VF device from the host so that the settings will take effect when we rebind it. - err = d.networkDeviceUnbind(vfPCISlot, vfDriverPath) + err = networkDeviceUnbind(vfPCIDev) if err != nil { return err } // However we return from this function, we must try to rebind the VF so its not orphaned. // The OS won't let an already bound device be bound again so is safe to call twice. - defer d.networkDeviceBind(vfPCISlot, vfDriverPath) + defer networkDeviceBind(vfPCIDev) // Reset VF VLAN if specified if volatile["last_state.vf.vlan"] != "" { @@ -581,7 +537,7 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error { } // Bind VF device onto the host so that the settings will take effect. - err = d.networkDeviceBind(vfPCISlot, vfDriverPath) + err = networkDeviceBind(vfPCIDev) if err != nil { return err } @@ -590,7 +546,7 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error { // and it will re-appear on the host. Unfortunately the time between sending the bind event // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait, // otherwise next step of restoring MAC and MTU settings in restorePhysicalNic will fail. - err = d.networkDeviceBindWait(volatile["host_name"]) + err = networkInterfaceBindWait(volatile["host_name"]) if err != nil { return err } From 5e3807da38b747ec3c67e40f33f54e36020dbe6a Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 12:03:03 +0000 Subject: [PATCH 13/15] lxd/device/nic/physical: Adds VM PCI passthrough support Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/nic_physical.go | 162 ++++++++++++++++++++++++++++--------- 1 file changed, 123 insertions(+), 39 deletions(-) diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go index e8470de7ef..25e36a7cc8 100644 --- a/lxd/device/nic_physical.go +++ b/lxd/device/nic_physical.go @@ -3,9 +3,12 @@ package device import ( "fmt" + "github.com/pkg/errors" + deviceConfig "github.com/lxc/lxd/lxd/device/config" "github.com/lxc/lxd/lxd/instance/instancetype" "github.com/lxc/lxd/lxd/revert" + "github.com/lxc/lxd/lxd/util" "github.com/lxc/lxd/shared" ) @@ -15,20 +18,22 @@ type nicPhysical struct { // validateConfig checks the supplied config for correctness. func (d *nicPhysical) validateConfig() error { - if d.inst.Type() != instancetype.Container { + if d.inst.Type() != instancetype.Container && d.inst.Type() != instancetype.VM { return ErrUnsupportedDevType } requiredFields := []string{"parent"} optionalFields := []string{ "name", - "mtu", - "hwaddr", - "vlan", "maas.subnet.ipv4", "maas.subnet.ipv6", "boot.priority", } + + if d.inst.Type() == instancetype.Container { + optionalFields = append(optionalFields, "mtu", "hwaddr", "vlan") + } + err := d.config.Validate(nicValidationRules(requiredFields, optionalFields)) if err != nil { return err @@ -66,45 +71,97 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) { revert := revert.New() defer revert.Fail() + // pciSlotName, used for VM physical passthrough. + var pciSlotName string + + // If VM, then try and load the vfio-pci module first. + if d.inst.Type() == instancetype.VM { + err = util.LoadModule("vfio-pci") + if err != nil { + return nil, errors.Wrapf(err, "Error loading %q module", "vfio-pci") + } + } + // Record the host_name device used for restoration later. saveData["host_name"] = NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) - statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"]) - if err != nil { - return nil, err - } - // Record whether we created this device or not so it can be removed on stop. - saveData["last_state.created"] = fmt.Sprintf("%t", statusDev != "existing") + if d.inst.Type() == instancetype.Container { + statusDev, err := NetworkCreateVlanDeviceIfNeeded(d.state, d.config["parent"], saveData["host_name"], d.config["vlan"]) + if err != nil { + return nil, err + } - if shared.IsTrue(saveData["last_state.created"]) { - revert.Add(func() { - NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"]) - }) - } + // Record whether we created this device or not so it can be removed on stop. + saveData["last_state.created"] = fmt.Sprintf("%t", statusDev != "existing") + + if shared.IsTrue(saveData["last_state.created"]) { + revert.Add(func() { + NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"]) + }) + } + + // If we didn't create the device we should track various properties so we can restore them when the + // instance is stopped or the device is detached. + if !shared.IsTrue(saveData["last_state.created"]) { + err = networkSnapshotPhysicalNic(saveData["host_name"], saveData) + if err != nil { + return nil, err + } + } + + // Set the MAC address. + if d.config["hwaddr"] != "" { + _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"]) + if err != nil { + return nil, fmt.Errorf("Failed to set the MAC address: %s", err) + } + } + + // Set the MTU. + if d.config["mtu"] != "" { + _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"]) + if err != nil { + return nil, fmt.Errorf("Failed to set the MTU: %s", err) + } + } + } else if d.inst.Type() == instancetype.VM { + // Get PCI information about the network interface. + ueventPath := fmt.Sprintf("/sys/class/net/%s/device/uevent", saveData["host_name"]) + pciDev, err := networkGetDevicePCIDevice(ueventPath) + if err != nil { + return nil, errors.Wrapf(err, "Failed to get PCI device info for %q", saveData["host_name"]) + } + + saveData["last_state.pci.slot.name"] = pciDev.SlotName + saveData["last_state.pci.driver"] = pciDev.Driver - // If we didn't create the device we should track various properties so we can restore them when the - // instance is stopped or the device is detached. - if !shared.IsTrue(saveData["last_state.created"]) { - err = networkSnapshotPhysicalNic(saveData["host_name"], saveData) + // Unbind the interface from the host. + err = networkDeviceUnbind(pciDev) if err != nil { return nil, err } - } - // Set the MAC address. - if d.config["hwaddr"] != "" { - _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"]) + revert.Add(func() { networkDeviceBind(pciDev) }) + + // Register the device with the vfio-pci module. + err = networkVFIOPCIRegister(pciDev) if err != nil { - return nil, fmt.Errorf("Failed to set the MAC address: %s", err) + return nil, err } - } - // Set the MTU. - if d.config["mtu"] != "" { - _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"]) + vfioDev := pciDevice{ + Driver: "vfio-pci", + SlotName: pciDev.SlotName, + } + + revert.Add(func() { networkDeviceUnbind(vfioDev) }) + + err = networkDeviceBindWait(vfioDev) if err != nil { - return nil, fmt.Errorf("Failed to set the MTU: %s", err) + return nil, err } + + pciSlotName = saveData["last_state.pci.slot.name"] } err = d.volatileSet(saveData) @@ -124,6 +181,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) { runConf.NetworkInterface = append(runConf.NetworkInterface, []deviceConfig.RunConfigItem{ {Key: "devName", Value: d.name}, + {Key: "pciSlotName", Value: pciSlotName}, }...) } @@ -147,26 +205,52 @@ func (d *nicPhysical) Stop() (*deviceConfig.RunConfig, error) { // postStop is run after the device is removed from the instance. func (d *nicPhysical) postStop() error { defer d.volatileSet(map[string]string{ - "host_name": "", - "last_state.hwaddr": "", - "last_state.mtu": "", - "last_state.created": "", + "host_name": "", + "last_state.hwaddr": "", + "last_state.mtu": "", + "last_state.created": "", + "last_state.pci.slot.name": "", + "last_state.pci.driver": "", }) v := d.volatileGet() - hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) - // This will delete the parent interface if we created it for VLAN parent. - if shared.IsTrue(v["last_state.created"]) { - err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"]) + // If VM physical pass through, unbind from vfio-pci and bind back to host driver. + if d.inst.Type() == instancetype.VM && v["last_state.pci.slot.name"] != "" { + vfioDev := pciDevice{ + Driver: "vfio-pci", + SlotName: v["last_state.pci.slot.name"], + } + + err := networkDeviceUnbind(vfioDev) if err != nil { return err } - } else { - err := networkRestorePhysicalNic(hostName, v) + + hostDev := pciDevice{ + Driver: v["last_state.pci.driver"], + SlotName: v["last_state.pci.slot.name"], + } + + err = networkDeviceBind(hostDev) if err != nil { return err } + } else if d.inst.Type() == instancetype.Container { + hostName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) + + // This will delete the parent interface if we created it for VLAN parent. + if shared.IsTrue(v["last_state.created"]) { + err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"]) + if err != nil { + return err + } + } else if v["last_state.pci.slot.name"] == "" { + err := networkRestorePhysicalNic(hostName, v) + if err != nil { + return err + } + } } return nil From ba93cc69ec37eab0b73ab7316dca2b14d563a894 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Mon, 27 Jan 2020 16:20:03 +0000 Subject: [PATCH 14/15] lxd/device: Unexports NetworkRemoveInterfaceIfNeeded Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/device_utils_network.go | 4 ++-- lxd/device/nic_ipvlan.go | 2 +- lxd/device/nic_macvlan.go | 4 ++-- lxd/device/nic_physical.go | 4 ++-- lxd/device/nic_routed.go | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lxd/device/device_utils_network.go b/lxd/device/device_utils_network.go index 289bb8efe3..9abd4babc1 100644 --- a/lxd/device/device_utils_network.go +++ b/lxd/device/device_utils_network.go @@ -139,8 +139,8 @@ func NetworkRemoveInterface(nic string) error { return err } -// NetworkRemoveInterfaceIfNeeded removes a network interface by name but only if no other instance is using it. -func NetworkRemoveInterfaceIfNeeded(state *state.State, nic string, current instance.Instance, parent string, vlanID string) error { +// networkRemoveInterfaceIfNeeded removes a network interface by name but only if no other instance is using it. +func networkRemoveInterfaceIfNeeded(state *state.State, nic string, current instance.Instance, parent string, vlanID string) error { // Check if it's used by another instance. instances, err := InstanceLoadNodeAll(state) if err != nil { diff --git a/lxd/device/nic_ipvlan.go b/lxd/device/nic_ipvlan.go index ee61a8353e..75c8227f8f 100644 --- a/lxd/device/nic_ipvlan.go +++ b/lxd/device/nic_ipvlan.go @@ -232,7 +232,7 @@ func (d *nicIPVLAN) postStop() error { // This will delete the parent interface if we created it for VLAN parent. if shared.IsTrue(v["last_state.created"]) { parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) - err := NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"]) + err := networkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"]) if err != nil { return err } diff --git a/lxd/device/nic_macvlan.go b/lxd/device/nic_macvlan.go index 65401ebcc9..03d34a9e47 100644 --- a/lxd/device/nic_macvlan.go +++ b/lxd/device/nic_macvlan.go @@ -83,7 +83,7 @@ func (d *nicMACVLAN) Start() (*deviceConfig.RunConfig, error) { if shared.IsTrue(saveData["last_state.created"]) { revert.Add(func() { - NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"]) + networkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"]) }) } @@ -188,7 +188,7 @@ func (d *nicMACVLAN) postStop() error { // This will delete the parent interface if we created it for VLAN parent. if shared.IsTrue(v["last_state.created"]) { actualParentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) - err := NetworkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"]) + err := networkRemoveInterfaceIfNeeded(d.state, actualParentName, d.inst, d.config["parent"], d.config["vlan"]) if err != nil { errs = append(errs, err) } diff --git a/lxd/device/nic_physical.go b/lxd/device/nic_physical.go index 25e36a7cc8..b8d4662b72 100644 --- a/lxd/device/nic_physical.go +++ b/lxd/device/nic_physical.go @@ -96,7 +96,7 @@ func (d *nicPhysical) Start() (*deviceConfig.RunConfig, error) { if shared.IsTrue(saveData["last_state.created"]) { revert.Add(func() { - NetworkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"]) + networkRemoveInterfaceIfNeeded(d.state, saveData["host_name"], d.inst, d.config["parent"], d.config["vlan"]) }) } @@ -241,7 +241,7 @@ func (d *nicPhysical) postStop() error { // This will delete the parent interface if we created it for VLAN parent. if shared.IsTrue(v["last_state.created"]) { - err := NetworkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"]) + err := networkRemoveInterfaceIfNeeded(d.state, hostName, d.inst, d.config["parent"], d.config["vlan"]) if err != nil { return err } diff --git a/lxd/device/nic_routed.go b/lxd/device/nic_routed.go index 3f6d0cb66f..6077ce41bc 100644 --- a/lxd/device/nic_routed.go +++ b/lxd/device/nic_routed.go @@ -310,7 +310,7 @@ func (d *nicRouted) postStop() error { // This will delete the parent interface if we created it for VLAN parent. if shared.IsTrue(v["last_state.created"]) { parentName := NetworkGetHostDevice(d.config["parent"], d.config["vlan"]) - err := NetworkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"]) + err := networkRemoveInterfaceIfNeeded(d.state, parentName, d.inst, d.config["parent"], d.config["vlan"]) if err != nil { return err } From 47bd67bb1ff0a4207ac6abdb13505cca77a5aabb Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Mon, 27 Jan 2020 17:54:39 +0000 Subject: [PATCH 15/15] lxd/device/nic/sriov: Adds VM support Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/device/nic_sriov.go | 181 ++++++++++++++++++++++++++++------------ 1 file changed, 128 insertions(+), 53 deletions(-) diff --git a/lxd/device/nic_sriov.go b/lxd/device/nic_sriov.go index 10a111ef6f..9269604381 100644 --- a/lxd/device/nic_sriov.go +++ b/lxd/device/nic_sriov.go @@ -12,6 +12,7 @@ import ( deviceConfig "github.com/lxc/lxd/lxd/device/config" "github.com/lxc/lxd/lxd/instance/instancetype" + "github.com/lxc/lxd/lxd/revert" "github.com/lxc/lxd/shared" ) @@ -21,14 +22,13 @@ type nicSRIOV struct { // validateConfig checks the supplied config for correctness. func (d *nicSRIOV) validateConfig() error { - if d.inst.Type() != instancetype.Container { + if d.inst.Type() != instancetype.Container && d.inst.Type() != instancetype.VM { return ErrUnsupportedDevType } requiredFields := []string{"parent"} optionalFields := []string{ "name", - "mtu", "hwaddr", "vlan", "security.mac_filtering", @@ -36,6 +36,12 @@ func (d *nicSRIOV) validateConfig() error { "maas.subnet.ipv6", "boot.priority", } + + // For VMs only NIC properties that can be specified on the parent's VF settings are controllable. + if d.inst.Type() == instancetype.Container { + optionalFields = append(optionalFields, "mtu") + } + err := d.config.Validate(nicValidationRules(requiredFields, optionalFields)) if err != nil { return err @@ -76,31 +82,33 @@ func (d *nicSRIOV) Start() (*deviceConfig.RunConfig, error) { return nil, err } - err = d.setupSriovParent(vfDev, vfID, saveData) + vfPCIDev, err := d.setupSriovParent(vfDev, vfID, saveData) if err != nil { return nil, err } - // Set the MAC address. - if d.config["hwaddr"] != "" { - _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"]) - if err != nil { - return nil, fmt.Errorf("Failed to set the MAC address: %s", err) + if d.inst.Type() == instancetype.Container { + // Set the MAC address. + if d.config["hwaddr"] != "" { + _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "address", d.config["hwaddr"]) + if err != nil { + return nil, fmt.Errorf("Failed to set the MAC address: %s", err) + } } - } - // Set the MTU. - if d.config["mtu"] != "" { - _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"]) - if err != nil { - return nil, fmt.Errorf("Failed to set the MTU: %s", err) + // Set the MTU. + if d.config["mtu"] != "" { + _, err := shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "mtu", d.config["mtu"]) + if err != nil { + return nil, fmt.Errorf("Failed to set the MTU: %s", err) + } } - } - // Bring the interface up. - _, err = shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "up") - if err != nil { - return nil, fmt.Errorf("Failed to bring up the interface: %v", err) + // Bring the interface up. + _, err = shared.RunCommand("ip", "link", "set", "dev", saveData["host_name"], "up") + if err != nil { + return nil, fmt.Errorf("Failed to bring up the interface: %v", err) + } } err = d.volatileSet(saveData) @@ -116,6 +124,14 @@ func (d *nicSRIOV) Start() (*deviceConfig.RunConfig, error) { {Key: "link", Value: saveData["host_name"]}, } + if d.inst.Type() == instancetype.VM { + runConf.NetworkInterface = append(runConf.NetworkInterface, + []deviceConfig.RunConfigItem{ + {Key: "devName", Value: d.name}, + {Key: "pciSlotName", Value: vfPCIDev.SlotName}, + }...) + } + return &runConf, nil } @@ -143,6 +159,7 @@ func (d *nicSRIOV) postStop() error { "last_state.vf.hwaddr": "", "last_state.vf.vlan": "", "last_state.vf.spoofcheck": "", + "last_state.pci.driver": "", }) v := d.volatileGet() @@ -308,15 +325,20 @@ func (d *nicSRIOV) getFreeVFInterface(reservedDevices map[string]struct{}, vfLis return "", nil } -// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original -// properties of the physical device into voltatile for restoration on detach. -func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[string]string) error { +// setupSriovParent configures a SR-IOV virtual function (VF) device on parent and stores original properties of +// the physical device into voltatile for restoration on detach. Returns VF PCI device info. +func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[string]string) (pciDevice, error) { + var vfPCIDev pciDevice + // Retrieve VF settings from parent device. vfInfo, err := d.networkGetVirtFuncInfo(d.config["parent"], vfID) if err != nil { - return err + return vfPCIDev, err } + revert := revert.New() + defer revert.Fail() + // Record properties of VF settings on the parent device. volatile["last_state.vf.hwaddr"] = vfInfo.mac volatile["last_state.vf.id"] = fmt.Sprintf("%d", vfID) @@ -330,30 +352,28 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri // Record properties of VF device. err = networkSnapshotPhysicalNic(volatile["host_name"], volatile) if err != nil { - return err + return vfPCIDev, err } // Get VF device's PCI Slot Name so we can unbind and rebind it from the host. - vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"]) + vfPCIDev, err = d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"]) if err != nil { - return err + return vfPCIDev, err } // Unbind VF device from the host so that the settings will take effect when we rebind it. err = networkDeviceUnbind(vfPCIDev) if err != nil { - return err + return vfPCIDev, err } - // However we return from this function, we must try to rebind the VF so its not orphaned. - // The OS won't let an already bound device be bound again so is safe to call twice. - defer networkDeviceBind(vfPCIDev) + revert.Add(func() { networkDeviceBind(vfPCIDev) }) // Setup VF VLAN if specified. if d.config["vlan"] != "" { _, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "vlan", d.config["vlan"]) if err != nil { - return err + return vfPCIDev, err } } @@ -370,44 +390,82 @@ func (d *nicSRIOV) setupSriovParent(vfDevice string, vfID int, volatile map[stri // Set MAC on VF (this combined with spoof checking prevents any other MAC being used). _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", mac) if err != nil { - return err + return vfPCIDev, err } // Now that MAC is set on VF, we can enable spoof checking. _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "spoofchk", "on") if err != nil { - return err + return vfPCIDev, err } } else { // Reset VF to ensure no previous MAC restriction exists. _, err := shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", "00:00:00:00:00:00") if err != nil { - return err + return vfPCIDev, err } // Ensure spoof checking is disabled if not enabled in instance. _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "spoofchk", "off") if err != nil { - return err + return vfPCIDev, err } - } - // Bind VF device onto the host so that the settings will take effect. - err = networkDeviceBind(vfPCIDev) - if err != nil { - return err + // Set MAC on VF if specified (this should be passed through into VM when it is bound to vfio-pci). + if d.inst.Type() == instancetype.VM { + // If no MAC specified in config, use current VF interface MAC. + mac := d.config["hwaddr"] + if mac == "" { + mac = volatile["last_state.hwaddr"] + } + + _, err = shared.RunCommand("ip", "link", "set", "dev", d.config["parent"], "vf", volatile["last_state.vf.id"], "mac", mac) + if err != nil { + return vfPCIDev, err + } + } } - // Wait for VF driver to be reloaded, this will remove the VF interface temporarily, and - // it will re-appear shortly after. Unfortunately the time between sending the bind event - // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait, - // otherwise next steps of applying settings to interface will fail. - err = networkInterfaceBindWait(volatile["host_name"]) - if err != nil { - return err + if d.inst.Type() == instancetype.Container { + // Bind VF device onto the host so that the settings will take effect. + err = networkDeviceBind(vfPCIDev) + if err != nil { + return vfPCIDev, err + } + + // Wait for VF driver to be reloaded, this will remove the VF interface temporarily, and + // it will re-appear shortly after. Unfortunately the time between sending the bind event + // to the nic and it actually appearing on the host is non-zero, so we need to watch and wait, + // otherwise next steps of applying settings to interface will fail. + err = networkInterfaceBindWait(volatile["host_name"]) + if err != nil { + return vfPCIDev, err + } + } else if d.inst.Type() == instancetype.VM { + // Register VF device with vfio-pci driver so it can be passed to VM. + err = networkVFIOPCIRegister(vfPCIDev) + if err != nil { + return vfPCIDev, err + } + + vfioDev := pciDevice{ + Driver: "vfio-pci", + SlotName: vfPCIDev.SlotName, + } + + revert.Add(func() { networkDeviceUnbind(vfioDev) }) + + err = networkDeviceBindWait(vfioDev) + if err != nil { + return vfPCIDev, err + } + + // Record original driver used by VF device for restore. + volatile["last_state.pci.driver"] = vfPCIDev.Driver } - return nil + revert.Success() + return vfPCIDev, nil } // virtFuncInfo holds information about SR-IOV virtual functions. @@ -490,16 +548,33 @@ func (d *nicSRIOV) restoreSriovParent(volatile map[string]string) error { return nil } - // Get VF device's PCI Slot Name so we can unbind and rebind it from the host. + // Get VF device's PCI info so we can unbind and rebind it from the host. vfPCIDev, err := d.networkGetVFDevicePCISlot(volatile["last_state.vf.id"]) if err != nil { return err } - // Unbind VF device from the host so that the settings will take effect when we rebind it. - err = networkDeviceUnbind(vfPCIDev) - if err != nil { - return err + if d.inst.Type() == instancetype.Container { + // Unbind VF device from the host so that the settings will take effect when we rebind it. + err = networkDeviceUnbind(vfPCIDev) + if err != nil { + return err + } + } else if d.inst.Type() == instancetype.VM { + // Unbind VF device from vfio-pci driver so that we can rebind it on host. + vfioDev := pciDevice{ + Driver: "vfio-pci", + SlotName: vfPCIDev.SlotName, + } + + err := networkDeviceUnbind(vfioDev) + if err != nil { + return err + } + + // Before we bind the device back to the host, ensure we restore the original driver info as it + // should be currently set to vfio-pci. + vfPCIDev.Driver = volatile["last_state.pci.driver"] } // However we return from this function, we must try to rebind the VF so its not orphaned.
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel