From: Nathan Chen <[email protected]>

Integrate and use the IOMMU_OPTION_RLIMIT_MODE
ioctl to set per-process memory accounting for
iommufd. This prevents ENOMEM errors from the
default per-user memory accounting when multiple
VMs under the libvirt-qemu user have their pinned
memory summed and checked against a per-process
RLIMIT_MEMLOCK limit.

Signed-off-by: Nathan Chen <[email protected]>
---
 po/POTFILES              |  1 +
 src/libvirt_private.syms |  3 ++
 src/qemu/qemu_process.c  |  7 ++++
 src/util/meson.build     |  1 +
 src/util/viriommufd.c    | 89 ++++++++++++++++++++++++++++++++++++++++
 src/util/viriommufd.h    | 23 +++++++++++
 6 files changed, 124 insertions(+)
 create mode 100644 src/util/viriommufd.c
 create mode 100644 src/util/viriommufd.h

diff --git a/po/POTFILES b/po/POTFILES
index f0aad35c8c..c78d2b8000 100644
--- a/po/POTFILES
+++ b/po/POTFILES
@@ -303,6 +303,7 @@ src/util/virhostuptime.c
 src/util/viridentity.c
 src/util/virinhibitor.c
 src/util/virinitctl.c
+src/util/viriommufd.c
 src/util/viriscsi.c
 src/util/virjson.c
 src/util/virlease.c
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index ed2b0d381e..e2a7a16347 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2652,6 +2652,9 @@ virInhibitorRelease;
 virInitctlFifos;
 virInitctlSetRunLevel;
 
+# util/viriommufd.h
+virIOMMUFDSetRLimitMode;
+
 # util/viriscsi.h
 virISCSIConnectionLogin;
 virISCSIConnectionLogout;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 8863be2cb6..db56720f3d 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -104,6 +104,7 @@
 #include "backup_conf.h"
 #include "storage_file_probe.h"
 #include "virpci.h"
+#include "viriommufd.h"
 
 #include "logging/log_manager.h"
 #include "logging/log_protocol.h"
@@ -10392,6 +10393,12 @@ qemuProcessOpenIommuFd(virDomainObj *vm)
         return -1;
     }
 
+    /* Set per-process memory accounting */
+    if (virIOMMUFDSetRLimitMode(fd, true) < 0) {
+        VIR_FORCE_CLOSE(fd);
+        return -1;
+    }
+
     VIR_DEBUG("Opened IOMMU FD %d for domain %s", fd, vm->def->name);
     return fd;
 }
diff --git a/src/util/meson.build b/src/util/meson.build
index 4950a795cc..9fb0aa0fe7 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -46,6 +46,7 @@ util_sources = [
   'viridentity.c',
   'virinhibitor.c',
   'virinitctl.c',
+  'viriommufd.c',
   'viriscsi.c',
   'virjson.c',
   'virkeycode.c',
diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c
new file mode 100644
index 0000000000..163ac632ba
--- /dev/null
+++ b/src/util/viriommufd.c
@@ -0,0 +1,89 @@
+#include <config.h>
+
+#include "viriommufd.h"
+#include "virlog.h"
+#include "virerror.h"
+
+#include <sys/ioctl.h>
+#include <linux/types.h>
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+
+#define IOMMUFD_TYPE (';')
+
+#ifndef IOMMUFD_CMD_OPTION
+# define IOMMUFD_CMD_OPTION 0x87
+#endif
+
+#ifndef IOMMU_OPTION
+# define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
+#endif
+
+VIR_LOG_INIT("util.iommufd");
+
+enum iommufd_option {
+    IOMMU_OPTION_RLIMIT_MODE = 0,
+    IOMMU_OPTION_HUGE_PAGES = 1,
+};
+
+enum iommufd_option_ops {
+    IOMMU_OPTION_OP_SET = 0,
+    IOMMU_OPTION_OP_GET = 1,
+};
+
+struct iommu_option {
+    __u32 size;
+    __u32 option_id;
+    __u16 op;
+    __u16 __reserved;
+    __u32 object_id;
+    __aligned_u64 val64;
+};
+
+/**
+ * virIOMMUFDSetRLimitMode:
+ * @fd: iommufd file descriptor
+ * @processAccounting: true for per-process, false for per-user
+ *
+ * Set RLIMIT_MEMLOCK accounting mode for the iommufd.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+virIOMMUFDSetRLimitMode(int fd, bool processAccounting)
+{
+    struct iommu_option option = {
+        .size = sizeof(struct iommu_option),
+        .option_id = IOMMU_OPTION_RLIMIT_MODE,
+        .op = IOMMU_OPTION_OP_SET,
+        .__reserved = 0,
+        .object_id = 0,
+        .val64 = processAccounting ? 1 : 0,
+    };
+
+    if (ioctl(fd, IOMMU_OPTION, &option) < 0) {
+        switch (errno) {
+            case ENOTTY:
+                VIR_WARN("IOMMU_OPTION ioctl not supported");
+                return 0;
+
+            case EOPNOTSUPP:
+                VIR_WARN("IOMMU_OPTION_RLIMIT_MODE not supported by kernel");
+                return 0;
+
+            case EINVAL:
+                virReportSystemError(errno, "%s",
+                                    _("invalid iommufd option parameters"));
+                return -1;
+
+            default:
+                virReportSystemError(errno, "%s",
+                                    _("failed to set iommufd option"));
+                return -1;
+        }
+    }
+
+    VIR_DEBUG("Set iommufd rlimit mode to %s-based accounting",
+              processAccounting ? "process" : "user");
+    return 0;
+}
diff --git a/src/util/viriommufd.h b/src/util/viriommufd.h
new file mode 100644
index 0000000000..1a7c7c94d0
--- /dev/null
+++ b/src/util/viriommufd.h
@@ -0,0 +1,23 @@
+/*
+ * viriommufd.h: iommufd helpers
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "internal.h"
+
+int virIOMMUFDSetRLimitMode(int fd, bool processAccounting);
-- 
2.43.0

Reply via email to