On 1/30/2026 1:43 PM, Pavel Hrdina wrote:
On Fri, Jan 30, 2026 at 10:59:14AM -0800, Nathan Chen via Devel wrote:
From: Nathan Chen<[email protected]>

Implement the IOMMU_OPTION_RLIMIT_MODE
ioctl to set per-process memory accounting for
iommufd. This prevents ENOMEM errors from the
default per-user memory accounting when multiple
VMs under the libvirt-qemu user have their pinned
memory summed and checked against a per-process
RLIMIT_MEMLOCK limit.

Signed-off-by: Nathan Chen<[email protected]>
---
  meson.build              |   1 +
  po/POTFILES              |   1 +
  src/libvirt_private.syms |   3 ++
  src/util/meson.build     |   1 +
  src/util/viriommufd.c    | 111 +++++++++++++++++++++++++++++++++++++++
  src/util/viriommufd.h    |  25 +++++++++
  6 files changed, 142 insertions(+)
  create mode 100644 src/util/viriommufd.c
  create mode 100644 src/util/viriommufd.h
[...]

diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c
new file mode 100644
index 0000000000..b44bc8ed1d
--- /dev/null
+++ b/src/util/viriommufd.c
@@ -0,0 +1,111 @@
+#include <config.h>
+
+#include "viriommufd.h"
+#include "virlog.h"
+#include "virerror.h"
+#include "virfile.h"
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+
+VIR_LOG_INIT("util.iommufd");
+
+#ifdef __linux__
+
+# include <sys/ioctl.h>
+# include <linux/types.h>
+
+# ifdef HAVE_LINUX_IOMMUFD_H
+#  include <linux/iommufd.h>
+# endif
+
+# ifndef IOMMU_OPTION
+
+enum iommufd_option {
+    IOMMU_OPTION_RLIMIT_MODE = 0,
+    IOMMU_OPTION_HUGE_PAGES = 1,
+};
+
+enum iommufd_option_ops {
+    IOMMU_OPTION_OP_SET = 0,
+    IOMMU_OPTION_OP_GET = 1,
+};
+
+struct iommu_option {
+    __u32 size;
+    __u32 option_id;
+    __u16 op;
+    __u16 __reserved;
+    __u32 object_id;
+    __aligned_u64 val64;
+};
+
+#  define IOMMUFD_TYPE (';')
+#  define IOMMUFD_CMD_OPTION 0x87
+#  define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
+
+# endif
+
+/**
+ * virIOMMUFDSetRLimitMode:
+ * @fd: iommufd file descriptor
+ * @processAccounting: true for per-process, false for per-user
+ *
+ * Set RLIMIT_MEMLOCK accounting mode for the iommufd.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+virIOMMUFDSetRLimitMode(int fd, bool processAccounting)
+{
+    struct iommu_option option = {
+        .size = sizeof(struct iommu_option),
+        .option_id = IOMMU_OPTION_RLIMIT_MODE,
+        .op = IOMMU_OPTION_OP_SET,
+        .__reserved = 0,
+        .object_id = 0,
+        .val64 = processAccounting ? 1 : 0,
+    };
+
+    if (ioctl(fd, IOMMU_OPTION, &option) < 0) {
+        switch (errno) {
+            case ENOTTY:
+                VIR_WARN("IOMMU_OPTION ioctl not supported");
+                return -1;
+
+            case EOPNOTSUPP:
+                VIR_WARN("IOMMU_OPTION_RLIMIT_MODE not supported by kernel");
+                return -1;
+
+            case EINVAL:
+                virReportSystemError(errno, "%s",
+                                     _("invalid iommufd option parameters"));
+                return -1;
+
+            case EPERM:
+                VIR_WARN("Permission denied for IOMMU_OPTION ioctl. "
+                         "Per-user-based memory accounting to be used by 
default.");
+                return -1;
+
+            default:
+                virReportSystemError(errno, "%s",
+                                     _("failed to set iommufd option"));
+                return -1;
+        }
+    }
When we return -1 we should also set error instead of logging warning.
I can fix it before pushing, there are two options:

  - We will keep the switch() and keep the customized messages and
    call virReportSystemError() for each case

  - Or we can remove the switch and call virReportSystemError that already
    adds string representation of errno:

      if (ioctl(fd, IOMMU_OPTION, &option) < 0) {
          virReportSystemError(errno, "%s",
                               _("failed to set memory accounting for 
iommufd"));
          return -1;
      }

Thanks, I think we can go with the second option of removing the switch; it would be simpler and matches the pattern for ioctl failures elsewhere in Libvirt.

Nathan

Reply via email to