The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/7524

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
Tested with following devices:

For each GPU tested, I checked that `lspci` shows VGA compatible device in VM, and that `lspci -k` shows it using the relevant graphics driver in use, and that it shows in `/sys/class/drm`.

Additionally for the NVIDIA GPU, I installed `nvidia-smi` and check it could run and was able to extract info, such as temperature and product name.

The NVIDIA card required starting the VM with these options in order for nvidia-smi to work.

```
raw.qemu: -cpu host,kvm=off,hv_vendor_id=null -machine type=q35,kernel_irqchip=on
```

Onboard Intel i915 graphics:
```
00:02.0 VGA compatible controller: Intel Corporation Xeon E3-1200 v3/4th Gen Core Processor Integrated Graphics Controller (rev 06)
```

Discrete NVIDIA GTX 770 (and associated sound device in an IOMMU group):
```
01:00.0 VGA compatible controller: NVIDIA Corporation GK104 [GeForce GTX 770] (rev a1)
01:00.1 Audio device: NVIDIA Corporation GK104 HDMI Audio Controller (rev a1)
```

In order to pass through the NVIDIA device I needed to ensure the card wasn't active on the host (i.e no driver proprietary driver loaded), as dynamically rebinding to vfio-pci causes kernel panic if card is already active.

The Intel GPU doesn't have the same issue.
From d74e2b3510496399a4db64efbcdd1bbc3c0eea00 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Mon, 8 Jun 2020 15:55:53 +0100
Subject: [PATCH 1/9] doc/instances: Updates GPU device docs to show VM support

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 doc/instances.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/instances.md b/doc/instances.md
index 8e8f80ff36..6663111ee9 100644
--- a/doc/instances.md
+++ b/doc/instances.md
@@ -692,7 +692,7 @@ required    | boolean   | false             | no        | 
Whether or not this de
 
 ### Type: gpu
 
-Supported instance types: container
+Supported instance types: container, VM
 
 GPU device entries simply make the requested gpu device appear in the
 instance.
@@ -705,9 +705,9 @@ vendorid    | string    | -                 | no        | 
The vendor id of the G
 productid   | string    | -                 | no        | The product id of 
the GPU device
 id          | string    | -                 | no        | The card id of the 
GPU device
 pci         | string    | -                 | no        | The pci address of 
the GPU device
-uid         | int       | 0                 | no        | UID of the device 
owner in the instance
-gid         | int       | 0                 | no        | GID of the device 
owner in the instance
-mode        | int       | 0660              | no        | Mode of the device 
in the instance
+uid         | int       | 0                 | no        | UID of the device 
owner in the instance (container only)
+gid         | int       | 0                 | no        | GID of the device 
owner in the instance (container only)
+mode        | int       | 0660              | no        | Mode of the device 
in the instance (container only)
 
 ### Type: proxy
 

From 189b4752842ad3f56e4125f6ee939d64599156ea Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Mon, 8 Jun 2020 16:09:32 +0100
Subject: [PATCH 2/9] lxd/device/gpu: Updates validation for VM support

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/device/gpu.go | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/lxd/device/gpu.go b/lxd/device/gpu.go
index 3c111b9cac..b09a6a51f7 100644
--- a/lxd/device/gpu.go
+++ b/lxd/device/gpu.go
@@ -33,7 +33,7 @@ type gpu struct {
 
 // validateConfig checks the supplied config for correctness.
 func (d *gpu) validateConfig(instConf instance.ConfigReader) error {
-       if !instanceSupported(instConf.Type(), instancetype.Container) {
+       if !instanceSupported(instConf.Type(), instancetype.Container, 
instancetype.VM) {
                return ErrUnsupportedDevType
        }
 
@@ -52,12 +52,28 @@ func (d *gpu) validateConfig(instConf 
instance.ConfigReader) error {
                return err
        }
 
-       if d.config["pci"] != "" && (d.config["id"] != "" || 
d.config["productid"] != "" || d.config["vendorid"] != "") {
-               return fmt.Errorf("Cannot use id, productid or vendorid when 
pci is set")
+       if d.config["pci"] != "" {
+               for _, field := range []string{"id", "productid", "vendorid"} {
+                       if d.config[field] != "" {
+                               return fmt.Errorf(`Cannot use %q when when 
"pci" is set`)
+                       }
+               }
+       }
+
+       if d.config["id"] != "" {
+               for _, field := range []string{"pci", "productid", "vendorid"} {
+                       if d.config[field] != "" {
+                               return fmt.Errorf(`Cannot use %q when when "id" 
is set`)
+                       }
+               }
        }
 
-       if d.config["id"] != "" && (d.config["pci"] != "" || 
d.config["productid"] != "" || d.config["vendorid"] != "") {
-               return fmt.Errorf("Cannot use pci, productid or vendorid when 
id is set")
+       if instConf.Type() == instancetype.VM {
+               for _, field := range []string{"uid", "gid", "mode"} {
+                       if d.config[field] != "" {
+                               return fmt.Errorf("Cannot use %q when instannce 
type is VM")
+                       }
+               }
        }
 
        return nil

From cc6f5c855ca82b2357f8b2ea37541951ad9d4e6c Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Thu, 11 Jun 2020 13:29:27 +0100
Subject: [PATCH 3/9] lxd/device/config/device/runconfig: Adds GPU field to
 RunConfig

For passing through GPU device config settings to Qemu instances.

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/device/config/device_runconfig.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lxd/device/config/device_runconfig.go 
b/lxd/device/config/device_runconfig.go
index 35b24e3cbf..eba388047a 100644
--- a/lxd/device/config/device_runconfig.go
+++ b/lxd/device/config/device_runconfig.go
@@ -41,4 +41,5 @@ type RunConfig struct {
        Mounts           []MountEntryItem // Mounts to setup/remove.
        Uevents          [][]string       // Uevents to inject.
        PostHooks        []func() error   // Functions to be run after device 
attach/detach.
+       GPUDevice        []RunConfigItem  // GPU device configuration settings.
 }

From b1f04d86324234fb8027dca8664727b1c6a1edd4 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Fri, 12 Jun 2020 09:47:04 +0100
Subject: [PATCH 4/9] lxd/device/device/utils/generic: pciDeviceDriverOverride
 only check for driver binding if specified

Allows for clearing driver override and rebinding when original driver is 
unknown (or device was not originally bound to a driver).

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/device/device_utils_generic.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lxd/device/device_utils_generic.go 
b/lxd/device/device_utils_generic.go
index a959973c6e..819353e0b6 100644
--- a/lxd/device/device_utils_generic.go
+++ b/lxd/device/device_utils_generic.go
@@ -150,10 +150,12 @@ func pciDeviceDriverOverride(pciDev pciDevice, 
driverOverride string) error {
                SlotName: pciDev.SlotName,
        }
 
-       // Wait for the device to be bound to the overridden driver.
-       err = pciDeviceProbeWait(vfioDev)
-       if err != nil {
-               return err
+       // Wait for the device to be bound to the overridden driver if 
specified.
+       if vfioDev.Driver != "" {
+               err = pciDeviceProbeWait(vfioDev)
+               if err != nil {
+                       return err
+               }
        }
 
        revert.Success()

From 1fa434540d0739e610bfa4bbb6cdf2d8c086be04 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Thu, 11 Jun 2020 13:43:04 +0100
Subject: [PATCH 5/9] lxd/device/gpu: Adds VM GPU passthrough support

Unbinds specified device and associated IOMMU group VFs and rebinds them to 
vfio-pci.

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/device/gpu.go | 170 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 160 insertions(+), 10 deletions(-)

diff --git a/lxd/device/gpu.go b/lxd/device/gpu.go
index b09a6a51f7..200415a75d 100644
--- a/lxd/device/gpu.go
+++ b/lxd/device/gpu.go
@@ -9,6 +9,7 @@ import (
        "strconv"
        "strings"
 
+       "github.com/pkg/errors"
        "golang.org/x/sys/unix"
 
        deviceConfig "github.com/lxc/lxd/lxd/device/config"
@@ -95,6 +96,16 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
                return nil, err
        }
 
+       if d.inst.Type() == instancetype.VM {
+               return d.startVM()
+       }
+
+       return d.startContainer()
+}
+
+// startContainer detects the requested GPU devices and sets up unix-char 
devices.
+// Returns RunConfig populated with mount info required to pass the unix-char 
devices into the container.
+func (d *gpu) startContainer() (*deviceConfig.RunConfig, error) {
        runConf := deviceConfig.RunConfig{}
        gpus, err := resources.GetGPU()
        if err != nil {
@@ -103,14 +114,16 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
 
        sawNvidia := false
        found := false
+
        for _, gpu := range gpus.Cards {
+               // Skip any cards that don't match the vendorid, pci or 
productid settings (if specified).
                if (d.config["vendorid"] != "" && gpu.VendorID != 
d.config["vendorid"]) ||
                        (d.config["pci"] != "" && gpu.PCIAddress != 
d.config["pci"]) ||
                        (d.config["productid"] != "" && gpu.ProductID != 
d.config["productid"]) {
                        continue
                }
 
-               // Handle DRM devices if present and matches criteria.
+               // Setup DRM unix-char devices if present and matches id 
criteria (or if id not specified).
                if gpu.DRM != nil && (d.config["id"] == "" || fmt.Sprintf("%d", 
gpu.DRM.ID) == d.config["id"]) {
                        found = true
 
@@ -170,9 +183,9 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
                }
        }
 
+       // Setup additional unix-char devices for nvidia cards.
+       // No need to mount additional nvidia non-card devices as the 
nvidia.runtime setting will do this for us.
        if sawNvidia {
-               // No need to mount additional nvidia non-card devices as the 
nvidia.runtime
-               // setting will do this for us.
                instanceConfig := d.inst.ExpandedConfig()
                if !shared.IsTrue(instanceConfig["nvidia.runtime"]) {
                        nvidiaDevices, err := d.getNvidiaNonCardDevices()
@@ -201,15 +214,130 @@ func (d *gpu) Start() (*deviceConfig.RunConfig, error) {
        return &runConf, nil
 }
 
+// startVM detects the requested GPU devices and related virtual functions and 
rebinds them to the vfio-pci driver.
+func (d *gpu) startVM() (*deviceConfig.RunConfig, error) {
+       runConf := deviceConfig.RunConfig{}
+       gpus, err := resources.GetGPU()
+       if err != nil {
+               return nil, err
+       }
+
+       saveData := make(map[string]string)
+       var pciAddress string
+
+       for _, gpu := range gpus.Cards {
+               // Skip any cards that don't match the vendorid, pci, productid 
or DRM ID settings (if specified).
+               if (d.config["vendorid"] != "" && gpu.VendorID != 
d.config["vendorid"]) ||
+                       (d.config["pci"] != "" && gpu.PCIAddress != 
d.config["pci"]) ||
+                       (d.config["productid"] != "" && gpu.ProductID != 
d.config["productid"]) ||
+                       (d.config["id"] != "" && (gpu.DRM == nil || 
fmt.Sprintf("%d", gpu.DRM.ID) != d.config["id"])) {
+                       continue
+               }
+
+               if pciAddress != "" {
+                       return nil, fmt.Errorf("VMs cannot match multiple GPUs 
per device")
+               }
+
+               pciAddress = gpu.PCIAddress
+       }
+
+       if pciAddress == "" {
+               return nil, fmt.Errorf("Failed to detect requested GPU device")
+       }
+
+       // Get PCI information about the GPU device.
+       devicePath := filepath.Join("/sys/bus/pci/devices", pciAddress)
+       pciDev, err := pciParseUeventFile(filepath.Join(devicePath, "uevent"))
+       if err != nil {
+               return nil, errors.Wrapf(err, "Failed to get PCI device info 
for GPU %q", pciAddress)
+       }
+
+       saveData["last_state.pci.slot.name"] = pciDev.SlotName
+       saveData["last_state.pci.driver"] = pciDev.Driver
+
+       err = d.pciDeviceDriverOverrideIOMMU(pciDev, "vfio-pci", false)
+       if err != nil {
+               return nil, errors.Wrapf(err, "Failed to override IOMMU group 
driver")
+       }
+
+       runConf.GPUDevice = append(runConf.GPUDevice,
+               []deviceConfig.RunConfigItem{
+                       {Key: "devName", Value: d.name},
+                       {Key: "pciSlotName", Value: 
saveData["last_state.pci.slot.name"]},
+               }...)
+
+       err = d.volatileSet(saveData)
+       if err != nil {
+               return nil, err
+       }
+
+       return &runConf, nil
+}
+
+// pciDeviceDriverOverrideIOMMU overrides all functions in the specified 
device's IOMMU group (if exists) that
+// are functions of the device. If IOMMU group doesn't exist, only the device 
itself is overridden.
+// If restore argument is true, then IOMMU VF devices related to the main 
device have their driver override cleared
+// rather than being set to the driverOverride specified. This allows for 
IOMMU VFs that were using a different
+// driver (or no driver) when being overridden are not restored back to the 
main device's driver.
+func (d *gpu) pciDeviceDriverOverrideIOMMU(pciDev pciDevice, driverOverride 
string, restore bool) error {
+       iommuGroupPath := filepath.Join("/sys/bus/pci/devices", 
pciDev.SlotName, "iommu_group", "devices")
+
+       if shared.PathExists(iommuGroupPath) {
+               // Extract parent slot name by removing any virtual function ID.
+               parts := strings.SplitN(pciDev.SlotName, ".", 2)
+               prefix := parts[0]
+
+               // Iterate the members of the IOMMU group and override any that 
match the parent slot name prefix.
+               err := filepath.Walk(iommuGroupPath, func(path string, _ 
os.FileInfo, err error) error {
+                       if err != nil {
+                               return err
+                       }
+
+                       iommuSlotName := filepath.Base(path) // Virtual 
function's address is dir name.
+                       if strings.HasPrefix(iommuSlotName, prefix) {
+                               iommuPciDev := pciDevice{
+                                       Driver:   pciDev.Driver,
+                                       SlotName: iommuSlotName,
+                               }
+
+                               if iommuSlotName != pciDev.SlotName && restore {
+                                       // We don't know the original driver 
for VFs, so just remove override.
+                                       err = 
pciDeviceDriverOverride(iommuPciDev, "")
+                               } else {
+                                       err = 
pciDeviceDriverOverride(iommuPciDev, driverOverride)
+                               }
+
+                               if err != nil {
+                                       return err
+                               }
+                       }
+
+                       return nil
+               })
+               if err != nil {
+                       return err
+               }
+       } else {
+               err := pciDeviceDriverOverride(pciDev, driverOverride)
+               if err != nil {
+                       return err
+               }
+       }
+
+       return nil
+}
+
 // Stop is run when the device is removed from the instance.
 func (d *gpu) Stop() (*deviceConfig.RunConfig, error) {
        runConf := deviceConfig.RunConfig{
                PostHooks: []func() error{d.postStop},
        }
 
-       err := unixDeviceRemove(d.inst.DevicesPath(), "unix", d.name, "", 
&runConf)
-       if err != nil {
-               return nil, err
+       if d.inst.Type() == instancetype.Container {
+               err := unixDeviceRemove(d.inst.DevicesPath(), "unix", d.name, 
"", &runConf)
+               if err != nil {
+                       return nil, err
+               }
        }
 
        return &runConf, nil
@@ -217,10 +345,32 @@ func (d *gpu) Stop() (*deviceConfig.RunConfig, error) {
 
 // postStop is run after the device is removed from the instance.
 func (d *gpu) postStop() error {
-       // Remove host files for this device.
-       err := unixDeviceDeleteFiles(d.state, d.inst.DevicesPath(), "unix", 
d.name, "")
-       if err != nil {
-               return fmt.Errorf("Failed to delete files for device '%s': %v", 
d.name, err)
+       defer d.volatileSet(map[string]string{
+               "last_state.pci.slot.name": "",
+               "last_state.pci.driver":    "",
+       })
+
+       v := d.volatileGet()
+
+       if d.inst.Type() == instancetype.Container {
+               // Remove host files for this device.
+               err := unixDeviceDeleteFiles(d.state, d.inst.DevicesPath(), 
"unix", d.name, "")
+               if err != nil {
+                       return fmt.Errorf("Failed to delete files for device 
'%s': %v", d.name, err)
+               }
+       }
+
+       // If VM physical pass through, unbind from vfio-pci and bind back to 
host driver.
+       if d.inst.Type() == instancetype.VM && v["last_state.pci.slot.name"] != 
"" {
+               pciDev := pciDevice{
+                       Driver:   "vfio-pci",
+                       SlotName: v["last_state.pci.slot.name"],
+               }
+
+               err := d.pciDeviceDriverOverrideIOMMU(pciDev, 
v["last_state.pci.driver"], true)
+               if err != nil {
+                       return err
+               }
        }
 
        return nil

From 33d9f1f180fbb9e26338cfd43045f390a4291815 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Fri, 12 Jun 2020 10:09:57 +0100
Subject: [PATCH 6/9] lxd/instance/drivers/driver/qemu/templates: Consistent
 naming and casing for net dev templates

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/instance/drivers/driver_qemu_templates.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lxd/instance/drivers/driver_qemu_templates.go 
b/lxd/instance/drivers/driver_qemu_templates.go
index f4a6c3f1ca..9f4c3ae179 100644
--- a/lxd/instance/drivers/driver_qemu_templates.go
+++ b/lxd/instance/drivers/driver_qemu_templates.go
@@ -357,8 +357,8 @@ multifunction = "on"
 {{- end }}
 `))
 
-// qemuDevTapCommon is common PCI device template for tap based netdevs.
-var qemuDevTapCommon = template.Must(template.New("qemuDevTapCommon").Parse(`
+// qemuNetDevTapCommon is common PCI device template for tap based netdevs.
+var qemuNetDevTapCommon = 
template.Must(template.New("qemuNetDevTapCommon").Parse(`
 [device "dev-lxd_{{.devName}}"]
 {{- if eq .bus "pci" "pcie"}}
 driver = "virtio-net-pci"
@@ -377,7 +377,7 @@ multifunction = "on"
 `))
 
 // Devices use "lxd_" prefix indicating that this is a user named device.
-var qemuNetDevTapTun = 
template.Must(qemuDevTapCommon.New("qemuNetDevTapTun").Parse(`
+var qemuNetDevTapTun = 
template.Must(qemuNetDevTapCommon.New("qemuNetDevTapTun").Parse(`
 # Network card ("{{.devName}}" device)
 [netdev "lxd_{{.devName}}"]
 type = "tap"
@@ -385,21 +385,21 @@ vhost = "on"
 ifname = "{{.ifName}}"
 script = "no"
 downscript = "no"
-{{ template "qemuDevTapCommon" . -}}
+{{ template "qemuNetDevTapCommon" . -}}
 `))
 
 // Devices use "lxd_" prefix indicating that this is a user named device.
-var qemuNetdevTapFD = 
template.Must(qemuDevTapCommon.New("qemuNetdevTapFD").Parse(`
+var qemuNetDevTapFD = 
template.Must(qemuNetDevTapCommon.New("qemuNetDevTapFD").Parse(`
 # Network card ("{{.devName}}" device)
 [netdev "lxd_{{.devName}}"]
 type = "tap"
 vhost = "on"
 fd = "{{.tapFD}}"
-{{ template "qemuDevTapCommon" . -}}
+{{ template "qemuNetDevTapCommon" . -}}
 `))
 
 // Devices use "lxd_" prefix indicating that this is a user named device.
-var qemuNetdevPhysical = 
template.Must(template.New("qemuNetdevPhysical").Parse(`
+var qemuNetDevPhysical = 
template.Must(template.New("qemuNetDevPhysical").Parse(`
 # Network card ("{{.devName}}" device)
 [device "dev-lxd_{{.devName}}"]
 {{- if eq .bus "pci" "pcie"}}

From 53d5f528f26ef5012308bf6832defafbfa9c2dbc Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Fri, 12 Jun 2020 10:11:19 +0100
Subject: [PATCH 7/9] lxd/instance/drivers/driver/qemu: Consistent net dev
 naming usage

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/instance/drivers/driver_qemu.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lxd/instance/drivers/driver_qemu.go 
b/lxd/instance/drivers/driver_qemu.go
index 7c12346a98..a7e9070af0 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -2005,7 +2005,7 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, bus 
*qemuBus, bootIndexes m
 
                // Append the tap device file path to the list of files to be 
opened and passed to qemu.
                tplFields["tapFD"] = vm.addFileDescriptor(fdFiles, 
fmt.Sprintf("/dev/tap%d", ifindex))
-               tpl = qemuNetdevTapFD
+               tpl = qemuNetDevTapFD
        } else if shared.PathExists(fmt.Sprintf("/sys/class/net/%s/tun_flags", 
nicName)) {
                // Detect TAP (via TUN driver) device.
                tplFields["ifName"] = nicName
@@ -2013,7 +2013,7 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, bus 
*qemuBus, bootIndexes m
        } else if pciSlotName != "" {
                // Detect physical passthrough device.
                tplFields["pciSlotName"] = pciSlotName
-               tpl = qemuNetdevPhysical
+               tpl = qemuNetDevPhysical
        }
 
        devBus, devAddr, multi := bus.allocate("")

From 873535e62cdece4f82bbe0f5cfa517904d91c9d4 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Fri, 12 Jun 2020 11:22:05 +0100
Subject: [PATCH 8/9] lxd/instance/drivers/driver/qemu/templates: Adds
 qemuGPUDevPhysical template

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/instance/drivers/driver_qemu_templates.go | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/lxd/instance/drivers/driver_qemu_templates.go 
b/lxd/instance/drivers/driver_qemu_templates.go
index 9f4c3ae179..52b9632003 100644
--- a/lxd/instance/drivers/driver_qemu_templates.go
+++ b/lxd/instance/drivers/driver_qemu_templates.go
@@ -416,3 +416,24 @@ bootindex = "{{.bootIndex}}"
 multifunction = "on"
 {{- end }}
 `))
+
+// Devices use "lxd_" prefix indicating that this is a user named device.
+var qemuGPUDevPhysical = 
template.Must(template.New("qemuGPUDevPhysical").Parse(`
+# GPU card ("{{.devName}}" device)
+[device "dev-lxd_{{.devName}}"]
+{{- if eq .bus "pci" "pcie"}}
+driver = "vfio-pci"
+bus = "{{.devBus}}"
+addr = "{{.devAddr}}"
+{{- end}}
+{{if eq .bus "ccw" -}}
+driver = "vfio-ccw"
+{{- end}}
+host = "{{.pciSlotName}}"
+{{if .vga -}}
+x-vga = "on"
+{{- end }}
+{{if .multifunction -}}
+multifunction = "on"
+{{- end }}
+`))

From 15e3eaf43f0717248d2c50d9ab2bebcd499db9bd Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Fri, 12 Jun 2020 11:22:23 +0100
Subject: [PATCH 9/9] lxd/instance/drivers/driver/qemu: Adds GPU passthrough
 support

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/instance/drivers/driver_qemu.go | 85 +++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/lxd/instance/drivers/driver_qemu.go 
b/lxd/instance/drivers/driver_qemu.go
index a7e9070af0..58b8f0f978 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -1728,6 +1728,14 @@ func (vm *qemu) generateQemuConfigFile(busName string, 
devConfs []*deviceConfig.
                                return "", err
                        }
                }
+
+               // Add GPU device.
+               if len(runConf.GPUDevice) > 0 {
+                       err = vm.addGPUDevConfig(sb, bus, runConf.GPUDevice)
+                       if err != nil {
+                               return "", err
+                       }
+               }
        }
 
        // Write the agent mount config.
@@ -2027,6 +2035,83 @@ func (vm *qemu) addNetDevConfig(sb *strings.Builder, bus 
*qemuBus, bootIndexes m
        return fmt.Errorf("Unrecognised device type")
 }
 
+// addGPUDevConfig adds the qemu config required for adding a GPU device.
+func (vm *qemu) addGPUDevConfig(sb *strings.Builder, bus *qemuBus, gpuConfig 
[]deviceConfig.RunConfigItem) error {
+       var devName, pciSlotName string
+       for _, gpuItem := range gpuConfig {
+               if gpuItem.Key == "devName" {
+                       devName = gpuItem.Value
+               } else if gpuItem.Key == "pciSlotName" {
+                       pciSlotName = gpuItem.Value
+               }
+       }
+
+       devBus, devAddr, multi := bus.allocate(fmt.Sprintf("lxd_%s", devName))
+       tplFields := map[string]interface{}{
+               "bus":           bus.name,
+               "devBus":        devBus,
+               "devAddr":       devAddr,
+               "multifunction": multi,
+
+               "devName":     devName,
+               "pciSlotName": pciSlotName,
+               "vga":         true,
+       }
+
+       // Add main GPU device in VGA mode to qemu config.
+       err := qemuGPUDevPhysical.Execute(sb, tplFields)
+       if err != nil {
+               return err
+       }
+
+       // Add any other related IOMMU VFs as generic PCI devices.
+       iommuGroupPath := filepath.Join("/sys/bus/pci/devices", pciSlotName, 
"iommu_group", "devices")
+
+       if shared.PathExists(iommuGroupPath) {
+               // Extract parent slot name by removing any virtual function ID.
+               parts := strings.SplitN(pciSlotName, ".", 2)
+               prefix := parts[0]
+
+               // Iterate the members of the IOMMU group and override any that 
match the parent slot name prefix.
+               err := filepath.Walk(iommuGroupPath, func(path string, _ 
os.FileInfo, err error) error {
+                       if err != nil {
+                               return err
+                       }
+
+                       iommuSlotName := filepath.Base(path) // Virtual 
function's address is dir name.
+
+                       // Match any VFs that are related to the GPU device 
(but not the GPU device itself).
+                       if strings.HasPrefix(iommuSlotName, prefix) && 
iommuSlotName != pciSlotName {
+                               // Add VF device without VGA mode to qemu 
config.
+                               devBus, devAddr, multi := 
bus.allocate(fmt.Sprintf("lxd_%s", devName))
+                               tplFields := map[string]interface{}{
+                                       "bus":           bus.name,
+                                       "devBus":        devBus,
+                                       "devAddr":       devAddr,
+                                       "multifunction": multi,
+
+                                       // Generate associated device name by 
combining main device name and VF ID.
+                                       "devName":     fmt.Sprintf("%s_%s", 
devName, devAddr),
+                                       "pciSlotName": iommuSlotName,
+                                       "vga":         false,
+                               }
+
+                               err := qemuGPUDevPhysical.Execute(sb, tplFields)
+                               if err != nil {
+                                       return err
+                               }
+                       }
+
+                       return nil
+               })
+               if err != nil {
+                       return err
+               }
+       }
+
+       return nil
+}
+
 // pidFilePath returns the path where the qemu process should write its PID.
 func (vm *qemu) pidFilePath() string {
        return filepath.Join(vm.LogPath(), "qemu.pid")
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to