If a domain has an NVMe disk configured, then we need to allow it
on devices CGroup so that qemu can access it. There is one caveat
though - if an NVMe disk is read only we need CGroup to allow
write too. This is because when opening the device, qemu does
couple of ioctl()-s which are considered as write.

Signed-off-by: Michal Privoznik <mpriv...@redhat.com>
---
 src/qemu/qemu_cgroup.c | 101 +++++++++++++++++++++++++++++------------
 1 file changed, 72 insertions(+), 29 deletions(-)

diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 9684bf3662..b9fb0ebca2 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -119,10 +119,30 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm,
                              virStorageSourcePtr src,
                              bool forceReadonly)
 {
-    if (!src->path || !virStorageSourceIsLocalStorage(src)) {
-        VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s",
-                  NULLSTR(src->path), virStorageTypeToString(src->type));
-        return 0;
+    VIR_AUTOFREE(char *) path = NULL;
+    bool readonly = src->readonly || forceReadonly;
+
+    if (src->type == VIR_STORAGE_TYPE_NVME) {
+        /* Even though disk is R/O we can't make it so in
+         * CGroups. QEMU will try to do some ioctl()-s over the
+         * device and such operations are considered R/W by the
+         * kernel */
+        readonly = false;
+
+        if (!(path = virPCIDeviceAddressGetIOMMUGroupDev(&src->nvme->pciAddr)))
+            return -1;
+
+        if (qemuSetupImagePathCgroup(vm, QEMU_DEV_VFIO, false) < 0)
+            return -1;
+    } else {
+        if (!src->path || !virStorageSourceIsLocalStorage(src)) {
+            VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s",
+                      NULLSTR(src->path), virStorageTypeToString(src->type));
+            return 0;
+        }
+
+        if (VIR_STRDUP(path, src->path) < 0)
+            return -1;
     }
 
     if (virStoragePRDefIsManaged(src->pr) &&
@@ -130,7 +150,7 @@ qemuSetupImageCgroupInternal(virDomainObjPtr vm,
         qemuSetupImagePathCgroup(vm, QEMU_DEVICE_MAPPER_CONTROL_PATH, false) < 
0)
         return -1;
 
-    return qemuSetupImagePathCgroup(vm, src->path, src->readonly || 
forceReadonly);
+    return qemuSetupImagePathCgroup(vm, path, readonly);
 }
 
 
@@ -147,7 +167,10 @@ qemuTeardownImageCgroup(virDomainObjPtr vm,
                         virStorageSourcePtr src)
 {
     qemuDomainObjPrivatePtr priv = vm->privateData;
+    VIR_AUTOFREE(char *path) = NULL;
     int perms = VIR_CGROUP_DEVICE_RWM;
+    bool hasPR = false;
+    bool hasNVMe = false;
     size_t i;
     int ret;
 
@@ -155,41 +178,61 @@ qemuTeardownImageCgroup(virDomainObjPtr vm,
                                 VIR_CGROUP_CONTROLLER_DEVICES))
         return 0;
 
-    if (!src->path || !virStorageSourceIsLocalStorage(src)) {
-        VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s",
-                  NULLSTR(src->path), virStorageTypeToString(src->type));
-        return 0;
+    for (i = 0; i < vm->def->ndisks; i++) {
+        virStorageSourcePtr diskSrc = vm->def->disks[i]->src;
+
+        if (src == diskSrc)
+            continue;
+
+        if (virStoragePRDefIsManaged(diskSrc->pr))
+            hasPR = true;
+
+        if (virStorageSourceChainHasNVMe(diskSrc))
+            hasNVMe = true;
     }
 
-    if (virFileExists(QEMU_DEVICE_MAPPER_CONTROL_PATH)) {
-        for (i = 0; i < vm->def->ndisks; i++) {
-            virStorageSourcePtr diskSrc = vm->def->disks[i]->src;
+    if (src->type == VIR_STORAGE_TYPE_NVME) {
+        if (!(path = virPCIDeviceAddressGetIOMMUGroupDev(&src->nvme->pciAddr)))
+            return -1;
 
-            if (src == diskSrc)
-                continue;
-
-            if (virStoragePRDefIsManaged(diskSrc->pr))
-                break;
-        }
-
-        if (i == vm->def->ndisks) {
-            VIR_DEBUG("Disabling device mapper control");
-            ret = virCgroupDenyDevicePath(priv->cgroup,
-                                          QEMU_DEVICE_MAPPER_CONTROL_PATH,
-                                          perms, true);
+        if (!hasNVMe &&
+            !qemuDomainNeedsVFIO(vm->def)) {
+            ret = virCgroupDenyDevicePath(priv->cgroup, QEMU_DEV_VFIO, perms, 
true);
             virDomainAuditCgroupPath(vm, priv->cgroup, "deny",
-                                     QEMU_DEVICE_MAPPER_CONTROL_PATH,
+                                     QEMU_DEV_VFIO,
                                      virCgroupGetDevicePermsString(perms), 
ret);
             if (ret < 0)
-                return ret;
+                return -1;
         }
+    } else {
+        if (!src->path || !virStorageSourceIsLocalStorage(src)) {
+            VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s",
+                      NULLSTR(src->path), virStorageTypeToString(src->type));
+            return 0;
+        }
+
+        if (VIR_STRDUP(path, src->path) < 0)
+            return -1;
+    }
+
+    if (!hasPR &&
+        virFileExists(QEMU_DEVICE_MAPPER_CONTROL_PATH)) {
+        VIR_DEBUG("Disabling device mapper control");
+        ret = virCgroupDenyDevicePath(priv->cgroup,
+                                      QEMU_DEVICE_MAPPER_CONTROL_PATH,
+                                      perms, true);
+        virDomainAuditCgroupPath(vm, priv->cgroup, "deny",
+                                 QEMU_DEVICE_MAPPER_CONTROL_PATH,
+                                 virCgroupGetDevicePermsString(perms), ret);
+        if (ret < 0)
+            return ret;
     }
 
-    VIR_DEBUG("Deny path %s", src->path);
+    VIR_DEBUG("Deny path %s", path);
 
-    ret = virCgroupDenyDevicePath(priv->cgroup, src->path, perms, true);
+    ret = virCgroupDenyDevicePath(priv->cgroup, path, perms, true);
 
-    virDomainAuditCgroupPath(vm, priv->cgroup, "deny", src->path,
+    virDomainAuditCgroupPath(vm, priv->cgroup, "deny", path,
                              virCgroupGetDevicePermsString(perms), ret);
 
     /* If you're looking for a counter part to
-- 
2.21.0

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Reply via email to