The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/3952

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
The `vfio` interface type supports SR-IOV enabled network devices. These
devices associate a set of virtual functions (VFs) with the single physical
function (PF) of the network device. PFs are standard PCIe functions. VFs on
the other hand are very lightweight PCIe functions that are optimized for data
movement. They come with a limited set of configuration capabilites to prevent
changing properties of the PF. Given that VFs appear as regular PCIe devices to
the system they can be passed to containers just like a regular physical
device. The `vfio` interface type expects to be passed the name of an SR-IOV
enabled network device on the system via the `parent` property. LXD will then
check for any available VFs on the system. By default LXD will allocate the
first free VF it finds. If it detects that either none are enabled or all
currently enabled VFs are in use it will bump the number of supported VFs to
the maximum value and use the first free VF. If all possible VFs are in use or
the kernel or card doesn't support incrementing the number of VFs LXD will
return an error. To create a `vfio` network device use:

```
lxc config device add <container> <device-name> nic nictype=vfio parent=<sriov-enabled-device>
```

To tell LXD to use a specific unused VF add the `host_name` property and pass
it the name of the enabled VF.

Signed-off-by: Christian Brauner <[email protected]>
From fcb83715682766d813d5dbd8b403d90511b3f1d7 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Tue, 17 Oct 2017 14:26:16 +0200
Subject: [PATCH 1/3] container: add nictype "vfio"

Closes #3941.

Signed-off-by: Christian Brauner <[email protected]>
---
 lxd/container.go      |   6 +--
 lxd/container_lxc.go  | 111 +++++++++++++++++++++++++++++++++++++++++++++++---
 lxd/networks_utils.go |   2 +-
 3 files changed, 109 insertions(+), 10 deletions(-)

diff --git a/lxd/container.go b/lxd/container.go
index d53d91e19..bfe0f2349 100644
--- a/lxd/container.go
+++ b/lxd/container.go
@@ -306,12 +306,12 @@ func containerValidDevices(dbObj *sql.DB, devices 
types.Devices, profile bool, e
                                return fmt.Errorf("Missing nic type")
                        }
 
-                       if !shared.StringInSlice(m["nictype"], 
[]string{"bridged", "physical", "p2p", "macvlan"}) {
+                       if !shared.StringInSlice(m["nictype"], 
[]string{"bridged", "macvlan", "p2p", "physical", "vfio"}) {
                                return fmt.Errorf("Bad nic type: %s", 
m["nictype"])
                        }
 
-                       if shared.StringInSlice(m["nictype"], 
[]string{"bridged", "physical", "macvlan"}) && m["parent"] == "" {
-                               return fmt.Errorf("Missing parent for %s type 
nic.", m["nictype"])
+                       if shared.StringInSlice(m["nictype"], 
[]string{"bridged", "macvlan", "physical", "vfio"}) && m["parent"] == "" {
+                               return fmt.Errorf("Missing parent for %s type 
nic", m["nictype"])
                        }
                } else if m["type"] == "disk" {
                        if !expanded && !shared.StringInSlice(m["path"], 
diskDevicePaths) {
diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go
index fc5ed8cb0..45a33066e 100644
--- a/lxd/container_lxc.go
+++ b/lxd/container_lxc.go
@@ -1350,7 +1350,7 @@ func (c *containerLXC) initLXC() error {
                                if err != nil {
                                        return err
                                }
-                       } else if m["nictype"] == "physical" {
+                       } else if m["nictype"] == "physical" || m["nictype"] == 
"vfio" {
                                err = lxcSetConfigItem(cc, 
fmt.Sprintf("%s.%d.type", networkKeyPrefix, networkidx), "phys")
                                if err != nil {
                                        return err
@@ -1377,6 +1377,11 @@ func (c *containerLXC) initLXC() error {
                                if err != nil {
                                        return err
                                }
+                       } else if m["nictype"] == "vfio" {
+                               err = lxcSetConfigItem(cc, 
fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), m["host_name"])
+                               if err != nil {
+                                       return err
+                               }
                        } else if shared.StringInSlice(m["nictype"], 
[]string{"macvlan", "physical"}) {
                                err = lxcSetConfigItem(cc, 
fmt.Sprintf("%s.%d.link", networkKeyPrefix, networkidx), 
networkGetHostDevice(m["parent"], m["vlan"]))
                                if err != nil {
@@ -1386,7 +1391,7 @@ func (c *containerLXC) initLXC() error {
 
                        // Host Virtual NIC name
                        vethName := ""
-                       if m["host_name"] != "" {
+                       if m["host_name"] != "" && m["nictype"] != "vfio" {
                                vethName = m["host_name"]
                        } else if shared.IsTrue(m["security.mac_filtering"]) {
                                // We need a known device name for MAC filtering
@@ -5885,6 +5890,10 @@ func (c *containerLXC) createNetworkDevice(name string, 
m types.Device) (string,
                }
        }
 
+       if m["nictype"] == "vfio" {
+               dev = m["host_name"]
+       }
+
        // Handle bridged and p2p
        if shared.StringInSlice(m["nictype"], []string{"bridged", "p2p"}) {
                n2 := deviceNextVeth()
@@ -5914,7 +5923,7 @@ func (c *containerLXC) createNetworkDevice(name string, m 
types.Device) (string,
        }
 
        // Handle physical and macvlan
-       if shared.StringInSlice(m["nictype"], []string{"physical", "macvlan"}) {
+       if shared.StringInSlice(m["nictype"], []string{"macvlan", "physical"}) {
                // Deal with VLAN
                device := m["parent"]
                if m["vlan"] != "" {
@@ -6118,11 +6127,99 @@ func (c *containerLXC) fillNetworkDevice(name string, m 
types.Device) (types.Dev
        }
 
        // Fill in the host name (but don't generate a static one ourselves)
-       if m["host_name"] == "" && shared.StringInSlice(m["nictype"], 
[]string{"bridged", "p2p"}) {
-               configKey := fmt.Sprintf("volatile.%s.host_name", name)
+       configKey := fmt.Sprintf("volatile.%s.host_name", name)
+       if m["host_name"] == "" && shared.StringInSlice(m["nictype"], 
[]string{"bridged", "p2p", "vfio"}) {
                newDevice["host_name"] = c.localConfig[configKey]
        }
 
+       if m["nictype"] == "vfio" && m["parent"] != "" {
+               if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", 
m["parent"])) {
+                       return nil, fmt.Errorf("Parent device '%s' doesn't 
exist", m["parent"])
+               }
+
+               if newDevice["host_name"] == "" {
+                       sriovNumVFs := 
fmt.Sprintf("/sys/class/net/%s/device/sriov_numvfs", m["parent"])
+                       sriovTotalVFs := 
fmt.Sprintf("/sys/class/net/%s/device/sriov_totalvfs", m["parent"])
+
+                       // verify that this is indeed a SR-IOV enabled device
+                       if !shared.PathExists(sriovTotalVFs) {
+                               return nil, fmt.Errorf("Parent device '%s' 
doesn't support SR-IOV", m["parent"])
+                       }
+
+                       // get number of currently enabled VFs
+                       sriovNumVfsBuf, err := ioutil.ReadFile(sriovNumVFs)
+                       if err != nil {
+                               return nil, err
+                       }
+                       sriovNumVfsStr := 
strings.TrimSpace(string(sriovNumVfsBuf))
+                       sriovNum, err := strconv.Atoi(sriovNumVfsStr)
+                       if err != nil {
+                               return nil, err
+                       }
+
+                       // get number of possible VFs
+                       sriovTotalVfsBuf, err := ioutil.ReadFile(sriovTotalVFs)
+                       if err != nil {
+                               return nil, err
+                       }
+                       sriovTotalVfsStr := 
strings.TrimSpace(string(sriovTotalVfsBuf))
+                       sriovTotal, err := strconv.Atoi(sriovTotalVfsStr)
+                       if err != nil {
+                               return nil, err
+                       }
+
+                       // Check if any VFs are already enabled
+                       vf := ""
+                       for i := 0; i < sriovNum; i++ {
+                               vf = fmt.Sprintf("virtfn%d", i)
+                               if 
!shared.PathExists(fmt.Sprintf("/sys/class/net/%s/device/%s/net", m["parent"], 
vf)) {
+                                       vf = ""
+                                       continue
+                               }
+
+                               // Check if VF is already in use
+                               empty, err := 
shared.PathIsEmpty(fmt.Sprintf("/sys/class/net/%s/device/%s/net", m["parent"], 
vf))
+                               if err != nil {
+                                       return nil, err
+                               }
+                               if empty {
+                                       vf = ""
+                                       continue
+                               }
+
+                               // found free VF
+                               break
+                       }
+
+                       if vf == "" {
+                               if sriovNum == sriovTotal {
+                                       return nil, fmt.Errorf("All virtual 
functions of vfio device '%s' seem to be in use", m["parent"])
+                               }
+
+                               // bump the number of VFs to the maximum
+                               err := ioutil.WriteFile(sriovNumVFs, 
[]byte(sriovTotalVfsStr), 0644)
+                               if err != nil {
+                                       return nil, err
+                               }
+
+                               // use next free VF index
+                               vf = fmt.Sprintf("virtfn%d", sriovNum+1)
+                       }
+
+                       vf = fmt.Sprintf("/sys/class/net/%s/device/%s/net", 
m["parent"], vf)
+                       ents, err := ioutil.ReadDir(vf)
+                       if err != nil {
+                               return nil, err
+                       }
+                       if len(ents) == 0 || len(ents) > 1 {
+                               return nil, fmt.Errorf("Failed to determine 
unique device name")
+                       }
+
+                       newDevice["host_name"] = ents[0].Name()
+                       c.localConfig[configKey] = ents[0].Name()
+               }
+       }
+
        return newDevice, nil
 }
 
@@ -6249,6 +6346,8 @@ func (c *containerLXC) removeNetworkDevice(name string, m 
types.Device) error {
        var hostName string
        if m["nictype"] == "physical" {
                hostName = m["parent"]
+       } else if m["nictype"] == "vfio" {
+               hostName = m["host_name"]
        } else {
                hostName = deviceNextVeth()
        }
@@ -6266,7 +6365,7 @@ func (c *containerLXC) removeNetworkDevice(name string, m 
types.Device) error {
        }
 
        // If a veth, destroy it
-       if m["nictype"] != "physical" {
+       if m["nictype"] != "physical" && m["nictype"] != "vfio" {
                deviceRemoveInterface(hostName)
        }
 
diff --git a/lxd/networks_utils.go b/lxd/networks_utils.go
index 4cf455a84..95656ba53 100644
--- a/lxd/networks_utils.go
+++ b/lxd/networks_utils.go
@@ -104,7 +104,7 @@ func networkIsInUse(c container, name string) bool {
                        continue
                }
 
-               if !shared.StringInSlice(d["nictype"], []string{"bridged", 
"macvlan", "physical"}) {
+               if !shared.StringInSlice(d["nictype"], []string{"bridged", 
"macvlan", "physical", "vfio"}) {
                        continue
                }
 

From 817fc4cacb2acb4513cb48c41ad7330147226490 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Wed, 18 Oct 2017 11:33:35 +0200
Subject: [PATCH 2/3] doc/containers: add nictype=vfio

Closes #3941.

Signed-off-by: Christian Brauner <[email protected]>
---
 doc/containers.md | 58 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/doc/containers.md b/doc/containers.md
index c5a72bea9..c8a0f8f14 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -62,7 +62,7 @@ Key                             | Type      | Default       | 
Description
 :--                             | :---      | :------       | :----------
 volatile.\<name\>.hwaddr        | string    | -             | Network device 
MAC address (when no hwaddr property is set on the device itself)
 volatile.\<name\>.name          | string    | -             | Network device 
name (when no name propery is set on the device itself)
-volatile.\<name\>.host\_name    | string    | -             | Network device 
name on the host (for nictype=bridged or nictype=p2p)
+volatile.\<name\>.host\_name    | string    | -             | Network device 
name on the host (for nictype=bridged or nictype=p2p, or nictype=vfio)
 volatile.apply\_quota           | string    | -             | Disk quota to be 
applied on next container start
 volatile.apply\_template        | string    | -             | The name of a 
template hook which should be triggered upon next startup
 volatile.base\_image            | string    | -             | The hash of the 
image the container was created from, if any.
@@ -170,24 +170,25 @@ LXD supports different kind of network devices:
  - `bridged`: Uses an existing bridge on the host and creates a virtual device 
pair to connect the host bridge to the container.
  - `macvlan`: Sets up a new network device based on an existing one but using 
a different MAC address.
  - `p2p`: Creates a virtual device pair, putting one side in the container and 
leaving the other side on the host.
+ - `vfio`: Passes a virtual function of an SR-IOV enabled physical network 
device into the container.
 
 Different network interface types have different additional properties, the 
current list is:
 
-Key                     | Type      | Default           | Required  | Used by  
                     | API extension                          | Description
-:--                     | :--       | :--               | :--       | :--      
                     | :--                                    | :--
-nictype                 | string    | -                 | yes       | all      
                     | -                                      | The device 
type, one of "physical", "bridged", "macvlan" or "p2p"
-limits.ingress          | string    | -                 | no        | bridged, 
p2p                  | -                                      | I/O limit in 
bit/s (supports kbit, Mbit, Gbit suffixes)
-limits.egress           | string    | -                 | no        | bridged, 
p2p                  | -                                      | I/O limit in 
bit/s (supports kbit, Mbit, Gbit suffixes)
-limits.max              | string    | -                 | no        | bridged, 
p2p                  | -                                      | Same as 
modifying both limits.read and limits.write
-name                    | string    | kernel assigned   | no        | all      
                     | -                                      | The name of the 
interface inside the container
-host\_name              | string    | randomly assigned | no        | bridged, 
p2p, macvlan         | -                                      | The name of the 
interface inside the host
-hwaddr                  | string    | randomly assigned | no        | all      
                     | -                                      | The MAC address 
of the new interface
-mtu                     | integer   | parent MTU        | no        | all      
                     | -                                      | The MTU of the 
new interface
-parent                  | string    | -                 | yes       | 
physical, bridged, macvlan    | -                                      | The 
name of the host device or bridge
-vlan                    | integer   | -                 | no        | macvlan, 
physical             | network\_vlan, network\_vlan\_physical | The VLAN ID to 
attach to
-ipv4.address            | string    | -                 | no        | bridged  
                     | network                                | An IPv4 address 
to assign to the container through DHCP
-ipv6.address            | string    | -                 | no        | bridged  
                     | network                                | An IPv6 address 
to assign to the container through DHCP
-security.mac\_filtering | boolean   | false             | no        | bridged  
                     | network                                | Prevent the 
container from spoofing another's MAC address
+Key                     | Type      | Default           | Required  | Used by  
                        | API extension                          | Description
+:--                     | :--       | :--               | :--       | :--      
                        | :--                                    | :--
+nictype                 | string    | -                 | yes       | all      
                        | -                                      | The device 
type, one of "bridged", "macvlan", "p2p", "physical", or "vfio"
+limits.ingress          | string    | -                 | no        | bridged, 
p2p                     | -                                      | I/O limit in 
bit/s (supports kbit, Mbit, Gbit suffixes)
+limits.egress           | string    | -                 | no        | bridged, 
p2p                     | -                                      | I/O limit in 
bit/s (supports kbit, Mbit, Gbit suffixes)
+limits.max              | string    | -                 | no        | bridged, 
p2p                     | -                                      | Same as 
modifying both limits.read and limits.write
+name                    | string    | kernel assigned   | no        | all      
                        | -                                      | The name of 
the interface inside the container
+host\_name              | string    | randomly assigned | no        | bridged, 
macvlan, p2p, vfio      | -                                      | The name of 
the interface inside the host
+hwaddr                  | string    | randomly assigned | no        | all      
                        | -                                      | The MAC 
address of the new interface
+mtu                     | integer   | parent MTU        | no        | all      
                        | -                                      | The MTU of 
the new interface
+parent                  | string    | -                 | yes       | bridged, 
macvlan, physical, vfio | -                                      | The name of 
the host device or bridge
+vlan                    | integer   | -                 | no        | macvlan, 
physical                | network\_vlan, network\_vlan\_physical | The VLAN ID 
to attach to
+ipv4.address            | string    | -                 | no        | bridged  
                        | network                                | An IPv4 
address to assign to the container through DHCP
+ipv6.address            | string    | -                 | no        | bridged  
                        | network                                | An IPv6 
address to assign to the container through DHCP
+security.mac\_filtering | boolean   | false             | no        | bridged  
                        | network                                | Prevent the 
container from spoofing another's MAC address
 
 #### bridged or macvlan for connection to physical network
 The `bridged` and `macvlan` interface types can both be used to connect
@@ -206,6 +207,31 @@ your containers to talk to the host itself.
 In such case, a bridge is preferable. A bridge will also let you use mac
 filtering and I/O limits which cannot be applied to a macvlan device.
 
+#### vfio
+The `vfio` interface type supports SR-IOV enabled network devices. These
+devices associate a set of virtual functions (VFs) with the single physical
+function (PF) of the network device. PFs are standard PCIe functions. VFs on
+the other hand are very lightweight PCIe functions that are optimized for data
+movement. They come with a limited set of configuration capabilites to prevent
+changing properties of the PF. Given that VFs appear as regular PCIe devices to
+the system they can be passed to containers just like a regular physical
+device. The `vfio` interface type expects to be passed the name of an SR-IOV
+enabled network device on the system via the `parent` property. LXD will then
+check for any available VFs on the system. By default LXD will allocate the
+first free VF it finds. If it detects that either none are enabled or all
+currently enabled VFs are in use it will bump the number of supported VFs to
+the maximum value and use the first free VF. If all possible VFs are in use or
+the kernel or card doesn't support incrementing the number of VFs LXD will
+return an error. To create a `vfio` network device use:
+
+```
+lxc config device add <container> <device-name> nic nictype=vfio 
parent=<sriov-enabled-device>
+```
+
+To tell LXD to use a specific unused VF add the `host_name` property and pass
+it the name of the enabled VF.
+
+
 ### Type: disk
 Disk entries are essentially mountpoints inside the container. They can
 either be a bind-mount of an existing file or directory on the host, or

From d0aea5b2360c8ee551c352fb4610cf153027884d Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Wed, 18 Oct 2017 12:07:52 +0200
Subject: [PATCH 3/3] api extension: add "network_vfio" extension

Closes #3941.

This adds support for SR-IOV enabled network devices.

Signed-off-by: Christian Brauner <[email protected]>
---
 doc/api-extensions.md | 3 +++
 lxd/api_1.0.go        | 1 +
 2 files changed, 4 insertions(+)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 04695ed6d..cbb607418 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -346,3 +346,6 @@ This adds support for querying an LXD daemon for the system 
resources it has
 ## kernel\_limits
 This adds support for setting process limits such as maximum number of open
 files for the container via `nofile`. The format is `limits.kernel.[limit 
name]`.
+
+## network\_vfio
+This adds support for SR-IOV enabled network devices.
diff --git a/lxd/api_1.0.go b/lxd/api_1.0.go
index d05dc407d..4ac0634ce 100644
--- a/lxd/api_1.0.go
+++ b/lxd/api_1.0.go
@@ -130,6 +130,7 @@ func api10Get(d *Daemon, r *http.Request) Response {
                        "storage_block_filesystem_btrfs",
                        "resources",
                        "kernel_limits",
+                       "network_vfio",
                },
                APIStatus:  "stable",
                APIVersion: version.APIVersion,
_______________________________________________
lxc-devel mailing list
[email protected]
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to