[PATCH] tests: Fix libxlxml2domconfigtest

2022-11-10 Thread Jim Fehlig
Downstream CI recently encountered failures of libxlxml2domconfigtest when
building libvirt packages against Xen 4.17 rc3 packages. The test fails on
vnuma_hvm config, where suddently the actual json produced by
libxl_domain_config_to_json() contains a 'pnode' entry in the 'vnuma_nodes'
list, which is absent in the expected json. It appears the test has thus far
passed by luck. E.g. I was able to make the test pass in the failing
environment by changing the meson buildtype from debugoptimized to debug.

When a VM config contains vnuma settings, libxlMakeVnumaList() checks if the
number of requested vnuma nodes exceeds the number of physical nodes. The
number of physical nodes is retrieved with libxl_get_physinfo(), which can
return wildly different results in the context of unit tests. This change
mocks libxl_get_physinfo() to return consistent results. All fields of the
libxl_physinfo struct are set to 0 except nr_nodes, which is set to 6 to
ensure the vnuma_hvm configuration is properly tested.

Signed-off-by: Jim Fehlig 
---
 tests/libxlmock.c   | 15 +++
 tests/libxlxml2domconfigdata/vnuma-hvm.json |  5 +
 2 files changed, 20 insertions(+)

diff --git a/tests/libxlmock.c b/tests/libxlmock.c
index 4754597e5b..205d34df19 100644
--- a/tests/libxlmock.c
+++ b/tests/libxlmock.c
@@ -70,6 +70,21 @@ VIR_MOCK_IMPL_RET_ARGS(libxl_get_version_info,
 return 
 }
 
+VIR_MOCK_IMPL_RET_ARGS(libxl_get_physinfo,
+   int,
+   libxl_ctx *, ctx,
+   libxl_physinfo *, physinfo)
+{
+memset(physinfo, 0, sizeof(*physinfo));
+physinfo->nr_nodes = 6;
+
+/* silence gcc warning about unused function */
+if (0)
+real_libxl_get_physinfo(ctx, physinfo);
+
+return 0;
+}
+
 VIR_MOCK_STUB_RET_ARGS(libxl_get_free_memory,
int, 0,
libxl_ctx *, ctx,
diff --git a/tests/libxlxml2domconfigdata/vnuma-hvm.json 
b/tests/libxlxml2domconfigdata/vnuma-hvm.json
index 2556c82d5f..c90ee823a4 100644
--- a/tests/libxlxml2domconfigdata/vnuma-hvm.json
+++ b/tests/libxlxml2domconfigdata/vnuma-hvm.json
@@ -39,6 +39,7 @@
 41,
 51
 ],
+   "pnode": 1,
 "vcpus": [
 1
 ]
@@ -53,6 +54,7 @@
 31,
 41
 ],
+   "pnode": 2,
 "vcpus": [
 2
 ]
@@ -67,6 +69,7 @@
 21,
 31
 ],
+   "pnode": 3,
 "vcpus": [
 3
 ]
@@ -81,6 +84,7 @@
 10,
 21
 ],
+   "pnode": 4,
 "vcpus": [
 4
 ]
@@ -95,6 +99,7 @@
 21,
 10
 ],
+   "pnode": 5,
 "vcpus": [
 5
 ]
-- 
2.37.3



[libvirt][PATCH v17 8/9] security_dac: Set DAC label on SGX /dev nodes

2022-11-10 Thread Lin Yang
From: Michal Privoznik 

As advertised in previous commits, QEMU needs to access
/dev/sgx_vepc and /dev/sgx_provision files when SGX memory
backend is configured. And if it weren't for QEMU's namespaces,
we wouldn't dare to relabel them, because they are system wide
files. But if namespaces are used, then we can set label on
domain's private copies, just like we do for /dev/sev.

Signed-off-by: Michal Privoznik 
Signed-off-by: Haibin Huang 
---
 src/security/security_dac.c | 46 ++---
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/src/security/security_dac.c b/src/security/security_dac.c
index d94995c9cf..5ca63e30f4 100644
--- a/src/security/security_dac.c
+++ b/src/security/security_dac.c
@@ -48,6 +48,8 @@ VIR_LOG_INIT("security.security_dac");
 
 #define SECURITY_DAC_NAME "dac"
 #define DEV_SEV "/dev/sev"
+#define DEV_SGX_VEPC "/dev/sgx_vepc"
+#define DEV_SGX_PROVISION "/dev/sgx_provision"
 
 typedef struct _virSecurityDACData virSecurityDACData;
 struct _virSecurityDACData {
@@ -1843,24 +1845,24 @@ virSecurityDACRestoreMemoryLabel(virSecurityManager 
*mgr,
  virDomainDef *def G_GNUC_UNUSED,
  virDomainMemoryDef *mem)
 {
-int ret = -1;
-
 switch (mem->model) {
 case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
-ret = virSecurityDACRestoreFileLabel(mgr, mem->nvdimmPath);
+return virSecurityDACRestoreFileLabel(mgr, mem->nvdimmPath);
+
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+/* We set label on SGX /dev nodes iff running with namespaces, so we
+ * don't need to restore anything. */
 break;
 
 case VIR_DOMAIN_MEMORY_MODEL_DIMM:
 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
-case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
 case VIR_DOMAIN_MEMORY_MODEL_LAST:
 case VIR_DOMAIN_MEMORY_MODEL_NONE:
-ret = 0;
 break;
 }
 
-return ret;
+return 0;
 }
 
 
@@ -2020,35 +2022,43 @@ virSecurityDACSetMemoryLabel(virSecurityManager *mgr,
 {
 virSecurityDACData *priv = virSecurityManagerGetPrivateData(mgr);
 virSecurityLabelDef *seclabel;
-int ret = -1;
 uid_t user;
 gid_t group;
 
+seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_DAC_NAME);
+if (seclabel && !seclabel->relabel)
+return 0;
+
+if (virSecurityDACGetIds(seclabel, priv, , , NULL, NULL) < 0)
+return -1;
+
 switch (mem->model) {
 case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
-seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_DAC_NAME);
-if (seclabel && !seclabel->relabel)
-return 0;
+return virSecurityDACSetOwnership(mgr, NULL,
+  mem->nvdimmPath,
+  user, group, true);
 
-if (virSecurityDACGetIds(seclabel, priv, , , NULL, NULL) < 
0)
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+/* Skip chowning SGX if namespaces are disabled. */
+if (priv->mountNamespace &&
+(virSecurityDACSetOwnership(mgr, NULL,
+DEV_SGX_VEPC,
+user, group, true) < 0 ||
+ virSecurityDACSetOwnership(mgr, NULL,
+DEV_SGX_PROVISION,
+user, group, true) < 0))
 return -1;
-
-ret = virSecurityDACSetOwnership(mgr, NULL,
- mem->nvdimmPath,
- user, group, true);
 break;
 
 case VIR_DOMAIN_MEMORY_MODEL_DIMM:
 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
-case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
 case VIR_DOMAIN_MEMORY_MODEL_LAST:
 case VIR_DOMAIN_MEMORY_MODEL_NONE:
-ret = 0;
 break;
 }
 
-return ret;
+return 0;
 }
 
 
-- 
2.25.1



[libvirt][PATCH v17 9/9] qemu: Add command-line to generate SGX EPC memory backend

2022-11-10 Thread Lin Yang
According to the result parsing from xml, add the argument of
SGX EPC memory backend into QEMU command line.

$ qemu-system-x86_64 \
.. \
-object 
'{"qom-type":"memory-backend-epc","id":"memepc0","prealloc":true,"size":67108864,"host-nodes":[0,1],"policy":"bind"}'
 \
-object 
'{"qom-type":"memory-backend-epc","id":"memepc1","prealloc":true,"size":16777216,"host-nodes":[2,3],"policy":"bind"}'
 \
-machine 
sgx-epc.0.memdev=memepc0,sgx-epc.0.node=0,sgx-epc.1.memdev=memepc1,sgx-epc.1.node=1

Signed-off-by: Lin Yang 
Signed-off-by: Haibin Huang 
Signed-off-by: Michal Privoznik 
---
 src/qemu/qemu_alias.c |  3 +-
 src/qemu/qemu_command.c   | 65 ---
 src/qemu/qemu_monitor_json.c  | 41 ++--
 src/qemu/qemu_validate.c  | 32 +
 .../sgx-epc.x86_64-7.0.0.args | 40 
 tests/qemuxml2argvtest.c  |  2 +
 6 files changed, 167 insertions(+), 16 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/sgx-epc.x86_64-7.0.0.args

diff --git a/src/qemu/qemu_alias.c b/src/qemu/qemu_alias.c
index 6061dd3f02..ef8e87ab58 100644
--- a/src/qemu/qemu_alias.c
+++ b/src/qemu/qemu_alias.c
@@ -464,7 +464,8 @@ qemuDeviceMemoryGetAliasID(virDomainDef *def,
  * valid */
 if (!oldAlias &&
 mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM &&
-mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM)
+mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM &&
+mem->model != VIR_DOMAIN_MEMORY_MODEL_SGX_EPC)
 return mem->info.addr.dimm.slot;
 
 for (i = 0; i < def->nmems; i++) {
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 4ef979e278..41b9f7cb52 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3307,7 +3307,11 @@ qemuBuildMemoryBackendProps(virJSONValue **backendProps,
 
 props = virJSONValueNewObject();
 
-if (!mem->nvdimmPath &&
+if (mem->model == VIR_DOMAIN_MEMORY_MODEL_SGX_EPC) {
+backendType = "memory-backend-epc";
+if (!priv->memPrealloc)
+prealloc = true;
+} else if (!mem->nvdimmPath &&
 def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_MEMFD) {
 backendType = "memory-backend-memfd";
 
@@ -3322,7 +3326,6 @@ qemuBuildMemoryBackendProps(virJSONValue **backendProps,
 
 if (systemMemory)
 disableCanonicalPath = true;
-
 } else if (useHugepage || mem->nvdimmPath || memAccess ||
 def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) {
 
@@ -6600,6 +6603,8 @@ qemuAppendDomainMemoryMachineParams(virBuffer *buf,
 virQEMUCaps *qemuCaps)
 {
 virTristateSwitch dump = def->mem.dump_core;
+bool nvdimmAdded = false;
+int epcNum = 0;
 size_t i;
 
 if (dump == VIR_TRISTATE_SWITCH_ABSENT)
@@ -6611,8 +6616,36 @@ qemuAppendDomainMemoryMachineParams(virBuffer *buf,
 virBufferAddLit(buf, ",mem-merge=off");
 
 for (i = 0; i < def->nmems; i++) {
-if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) {
-virBufferAddLit(buf, ",nvdimm=on");
+int targetNode = def->mems[i]->targetNode;
+
+switch (def->mems[i]->model) {
+case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
+if (!nvdimmAdded) {
+virBufferAddLit(buf, ",nvdimm=on");
+nvdimmAdded = true;
+}
+break;
+
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+/* add sgx epc memory to -machine parameter */
+
+if (targetNode < 0) {
+/* set NUMA target node to 0 by default if user doesn't
+ * specify it. */
+targetNode = 0;
+}
+
+virBufferAsprintf(buf, 
",sgx-epc.%d.memdev=mem%s,sgx-epc.%d.node=%d",
+  epcNum, def->mems[i]->info.alias, epcNum, 
targetNode);
+
+epcNum++;
+break;
+
+case VIR_DOMAIN_MEMORY_MODEL_DIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
+case VIR_DOMAIN_MEMORY_MODEL_NONE:
+case VIR_DOMAIN_MEMORY_MODEL_LAST:
 break;
 }
 }
@@ -7322,11 +7355,27 @@ qemuBuildMemoryDeviceCommandLine(virCommand *cmd,
 if (qemuBuildMemoryDimmBackendStr(cmd, def->mems[i], def, cfg, priv) < 
0)
 return -1;
 
-if (!(props = qemuBuildMemoryDeviceProps(cfg, priv, def, 
def->mems[i])))
-return -1;
+switch (def->mems[i]->model) {
+case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
+case VIR_DOMAIN_MEMORY_MODEL_DIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
+if (!(props = qemuBuildMemoryDeviceProps(cfg, priv, def, 
def->mems[i])))
+return -1;
 
-if (qemuBuildDeviceCommandlineFromJSON(cmd, props, def, 
priv->qemuCaps) < 0)
-

[libvirt][PATCH v17 5/9] conf: Introduce SGX EPC element into device memory xml

2022-11-10 Thread Lin Yang

  ...
  

  0-1


  512
  0

  
  ...


Signed-off-by: Lin Yang 
Signed-off-by: Michal Privoznik 
Signed-off-by: Haibin Huang 
---
 docs/formatdomain.rst | 25 ++-
 src/conf/domain_conf.c| 30 +
 src/conf/domain_conf.h|  1 +
 src/conf/domain_postparse.c   |  1 +
 src/conf/domain_validate.c|  9 +++
 src/conf/schemas/domaincommon.rng |  1 +
 src/qemu/qemu_alias.c |  3 +
 src/qemu/qemu_command.c   |  1 +
 src/qemu/qemu_domain.c| 28 ++--
 src/qemu/qemu_domain_address.c|  6 ++
 src/qemu/qemu_driver.c|  1 +
 src/qemu/qemu_process.c   |  2 +
 src/qemu/qemu_validate.c  |  8 +++
 src/security/security_apparmor.c  |  1 +
 src/security/security_dac.c   |  2 +
 src/security/security_selinux.c   |  2 +
 tests/qemuxml2argvdata/sgx-epc.xml| 65 +++
 .../sgx-epc.x86_64-7.0.0.xml  | 65 +++
 tests/qemuxml2xmltest.c   |  2 +
 19 files changed, 248 insertions(+), 5 deletions(-)
 create mode 100644 tests/qemuxml2argvdata/sgx-epc.xml
 create mode 100644 tests/qemuxml2xmloutdata/sgx-epc.x86_64-7.0.0.xml

diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index 488b6be862..d7fffc6e0b 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -8002,6 +8002,20 @@ Example: usage of the memory devices
  524288

  
+ 
+   
+ 0-1
+   
+   
+ 16384
+ 0
+   
+ 
+ 
+   
+ 16384
+   
+ 

...
 
@@ -8010,7 +8024,9 @@ Example: usage of the memory devices
1.2.14` Provide ``nvdimm`` model that adds a Non-Volatile DIMM module.
:since:`Since 3.2.0` Provide ``virtio-pmem`` model to add a paravirtualized
persistent memory device. :since:`Since 7.1.0` Provide ``virtio-mem`` model
-   to add paravirtualized memory device. :since:`Since 7.9.0`
+   to add paravirtualized memory device. :since:`Since 7.9.0` Provide
+   ``sgx-epc`` model to add a SGX enclave page cache (EPC) memory to the guest.
+   :since:`Since 8.10.0 and QEMU 7.0.0`
 
 ``access``
An optional attribute ``access`` ( :since:`since 3.2.0` ) that provides
@@ -8070,6 +8086,13 @@ Example: usage of the memory devices
  Represents a path in the host that backs the virtio memory module in the
  guest. It is mandatory.
 
+   For model ``sgx-epc`` this element is optional. The following optional
+   elements may be used:
+
+   ``nodemask``
+  This element can be used to override the default set of NUMA nodes where
+  the memory would be allocated. :since:`Since 8.10.0 and QEMU 7.0.0`
+
 ``target``
The mandatory ``target`` element configures the placement and sizing of the
added memory from the perspective of the guest.
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index cbc08064c4..622f83590f 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -1444,6 +1444,7 @@ VIR_ENUM_IMPL(virDomainMemoryModel,
   "nvdimm",
   "virtio-pmem",
   "virtio-mem",
+  "sgx-epc",
 );
 
 VIR_ENUM_IMPL(virDomainShmemModel,
@@ -13054,6 +13055,20 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node,
 def->nvdimmPath = virXPathString("string(./path)", ctxt);
 break;
 
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+if ((nodemask = virXPathString("string(./nodemask)", ctxt))) {
+if (virBitmapParse(nodemask, >sourceNodes,
+   VIR_DOMAIN_CPUMASK_LEN) < 0)
+return -1;
+
+if (virBitmapIsAllClear(def->sourceNodes)) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _("Invalid value of 'nodemask': %s"), nodemask);
+return -1;
+}
+}
+break;
+
 case VIR_DOMAIN_MEMORY_MODEL_NONE:
 case VIR_DOMAIN_MEMORY_MODEL_LAST:
 break;
@@ -13122,6 +13137,7 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node,
 case VIR_DOMAIN_MEMORY_MODEL_NONE:
 case VIR_DOMAIN_MEMORY_MODEL_DIMM:
 case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
 case VIR_DOMAIN_MEMORY_MODEL_LAST:
 break;
 }
@@ -14918,6 +14934,11 @@ virDomainMemoryFindByDefInternal(virDomainDef *def,
 continue;
 break;
 
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes))
+continue;
+break;
+
 case VIR_DOMAIN_MEMORY_MODEL_NONE:
 case VIR_DOMAIN_MEMORY_MODEL_LAST:
 break;
@@ -24535,6 +24556,15 @@ virDomainMemorySourceDefFormat(virBuffer *buf,

[libvirt][PATCH v17 7/9] qemu_namespace: Create SGX related nodes in domain's namespace

2022-11-10 Thread Lin Yang
From: Michal Privoznik 

This is similar to the previous commit. SGX memory backend needs
to access /dev/sgx_vepc and /dev/sgx_provision. Create these
nodes in domain's private /dev when required by domain's config.

Signed-off-by: Michal Privoznik 
Signed-off-by: Haibin Huang 
---
 src/qemu/qemu_namespace.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c
index 8189cc37ba..90c0b90024 100644
--- a/src/qemu/qemu_namespace.c
+++ b/src/qemu/qemu_namespace.c
@@ -370,11 +370,23 @@ static int
 qemuDomainSetupMemory(virDomainMemoryDef *mem,
   GSList **paths)
 {
-if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
-mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM)
-return 0;
+switch (mem->model) {
+case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
+*paths = g_slist_prepend(*paths, g_strdup(mem->nvdimmPath));
+break;
+
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+*paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_SGX_VEPVC));
+*paths = g_slist_prepend(*paths, g_strdup(QEMU_DEV_SGX_PROVISION));
+break;
 
-*paths = g_slist_prepend(*paths, g_strdup(mem->nvdimmPath));
+case VIR_DOMAIN_MEMORY_MODEL_NONE:
+case VIR_DOMAIN_MEMORY_MODEL_DIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
+case VIR_DOMAIN_MEMORY_MODEL_LAST:
+break;
+}
 
 return 0;
 }
-- 
2.25.1



[libvirt][PATCH v17 4/9] conf: expose SGX feature in domain capabilities

2022-11-10 Thread Lin Yang
From: Haibin Huang 

Extend hypervisor capabilities to include sgx feature. When available,
the hypervisor supports launching an VM with SGX on Intel platfrom.
The SGX feature tag privides additional details like section size and
sgx1 or sgx2.

Signed-off-by: Haibin Huang 
Signed-off-by: Michal Privoznik 
---
 docs/formatdomaincaps.rst | 40 +
 src/conf/domain_capabilities.c| 36 
 src/conf/schemas/domaincaps.rng   | 43 +++
 src/qemu/qemu_capabilities.c  | 16 +++
 tests/domaincapsdata/bhyve_basic.x86_64.xml   |  1 +
 tests/domaincapsdata/bhyve_fbuf.x86_64.xml|  1 +
 tests/domaincapsdata/bhyve_uefi.x86_64.xml|  1 +
 tests/domaincapsdata/empty.xml|  1 +
 tests/domaincapsdata/libxl-xenfv.xml  |  1 +
 tests/domaincapsdata/libxl-xenpv.xml  |  1 +
 .../domaincapsdata/qemu_4.2.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_4.2.0-tcg.x86_64.xml  |  1 +
 .../qemu_4.2.0-virt.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_4.2.0.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_4.2.0.ppc64.xml |  1 +
 tests/domaincapsdata/qemu_4.2.0.s390x.xml |  1 +
 tests/domaincapsdata/qemu_4.2.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_5.0.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_5.0.0-tcg.x86_64.xml  |  1 +
 .../qemu_5.0.0-virt.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_5.0.0.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_5.0.0.ppc64.xml |  1 +
 tests/domaincapsdata/qemu_5.0.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_5.1.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_5.1.0-tcg.x86_64.xml  |  1 +
 tests/domaincapsdata/qemu_5.1.0.sparc.xml |  1 +
 tests/domaincapsdata/qemu_5.1.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_5.2.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_5.2.0-tcg.x86_64.xml  |  1 +
 .../qemu_5.2.0-virt.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_5.2.0.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_5.2.0.ppc64.xml |  1 +
 tests/domaincapsdata/qemu_5.2.0.s390x.xml |  1 +
 tests/domaincapsdata/qemu_5.2.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_6.0.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_6.0.0-tcg.x86_64.xml  |  1 +
 .../qemu_6.0.0-virt.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_6.0.0.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_6.0.0.s390x.xml |  1 +
 tests/domaincapsdata/qemu_6.0.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_6.1.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_6.1.0-tcg.x86_64.xml  |  1 +
 tests/domaincapsdata/qemu_6.1.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_6.2.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_6.2.0-tcg.x86_64.xml  |  1 +
 .../qemu_6.2.0-virt.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_6.2.0.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_6.2.0.ppc64.xml |  1 +
 tests/domaincapsdata/qemu_6.2.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_7.0.0-q35.x86_64.xml  | 10 +
 .../domaincapsdata/qemu_7.0.0-tcg.x86_64.xml  | 10 +
 .../qemu_7.0.0-virt.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_7.0.0.aarch64.xml   |  1 +
 tests/domaincapsdata/qemu_7.0.0.ppc64.xml |  1 +
 tests/domaincapsdata/qemu_7.0.0.x86_64.xml| 10 +
 .../domaincapsdata/qemu_7.1.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_7.1.0-tcg.x86_64.xml  |  1 +
 tests/domaincapsdata/qemu_7.1.0.ppc64.xml |  1 +
 tests/domaincapsdata/qemu_7.1.0.x86_64.xml|  1 +
 .../domaincapsdata/qemu_7.2.0-q35.x86_64.xml  |  1 +
 .../domaincapsdata/qemu_7.2.0-tcg.x86_64.xml  |  1 +
 tests/domaincapsdata/qemu_7.2.0.x86_64.xml|  1 +
 62 files changed, 220 insertions(+)

diff --git a/docs/formatdomaincaps.rst b/docs/formatdomaincaps.rst
index f95d3a7083..532fea0f60 100644
--- a/docs/formatdomaincaps.rst
+++ b/docs/formatdomaincaps.rst
@@ -614,6 +614,16 @@ capabilities. All features occur as children of the main 
``features`` element.
  47
  1

+   
+ no
+ yes
+ no
+ 524288
+ 
+   
+   
+ 
+   
  

 
@@ -693,3 +703,33 @@ in domain XML `__
 ``maxESGuests``
The maximum number of SEV-ES guests that can be launched on the host. This
value may be configurable in the firmware for some hosts.
+
+SGX capabilities
+
+
+Intel Software Guard Extensions (Intel SGX) capabilities are exposed under the
+``sgx`` element.
+
+Intel SGX helps protect data in use via unique application isolation 
technology.
+Protect selected code and data from modification using hardened enclaves with
+Intel SGX.
+
+For more details on the SGX feature, please follow resources in the SGX 
developer's
+document store. In order to use SGX with libvirt have a look at `SGX in domain 
XML
+`__
+
+``flc``
+   FLC (Flexible Launch Control), not strictly part of SGX2, but was not part 
of
+   original 

[libvirt][PATCH v17 2/9] qemu: Get SGX capabilities form QMP

2022-11-10 Thread Lin Yang
From: Haibin Huang 

Generate the QMP command for query-sgx-capabilities and the command
return SGX capabilities from QMP.

{"execute":"query-sgx-capabilities"}

the right reply:
  {"return":
{
  "sgx": true,
  "section-size": 197132288,
  "flc": true
}
  }

the error reply:
  {"error":
{"class": "GenericError", "desc": "SGX is not enabled in KVM"}
  }

Signed-off-by: Haibin Huang 
Signed-off-by: Michal Privoznik 
---
 src/qemu/qemu_monitor.c  |  10 
 src/qemu/qemu_monitor.h  |   3 +
 src/qemu/qemu_monitor_json.c | 113 +++
 src/qemu/qemu_monitor_json.h |   4 ++
 4 files changed, 130 insertions(+)

diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index cd9f43071e..8ada16154c 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -3418,6 +3418,16 @@ qemuMonitorGetSEVCapabilities(qemuMonitor *mon,
 }
 
 
+int
+qemuMonitorGetSGXCapabilities(qemuMonitor *mon,
+  virSGXCapability **capabilities)
+{
+QEMU_CHECK_MONITOR(mon);
+
+return qemuMonitorJSONGetSGXCapabilities(mon, capabilities);
+}
+
+
 int
 qemuMonitorNBDServerStart(qemuMonitor *mon,
   const virStorageNetHostDef *server,
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index 62f1bc1299..c690fc3655 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -823,6 +823,9 @@ int qemuMonitorGetGICCapabilities(qemuMonitor *mon,
 int qemuMonitorGetSEVCapabilities(qemuMonitor *mon,
   virSEVCapability **capabilities);
 
+int qemuMonitorGetSGXCapabilities(qemuMonitor *mon,
+  virSGXCapability **capabilities);
+
 typedef enum {
   QEMU_MONITOR_MIGRATE_RESUME   = 1 << 0, /* resume failed post-copy 
migration */
   QEMU_MONITOR_MIGRATION_FLAGS_LAST
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 74967afb24..fa44e07c2b 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -6025,6 +6025,119 @@ qemuMonitorJSONGetSEVCapabilities(qemuMonitor *mon,
 return 1;
 }
 
+
+/**
+ * qemuMonitorJSONGetSGXCapabilities:
+ * @mon: qemu monitor object
+ * @capabilities: pointer to pointer to a SGX capability structure to be filled
+ *
+ * This function queries and fills in INTEL's SGX platform-specific data.
+ * Note that from QEMU's POV both -object sgx-epc and query-sgx-capabilities
+ * can be present even if SGX is not available, which basically leaves us with
+ * checking for JSON "GenericError" in order to differentiate between 
compiled-in
+ * support and actual SGX support on the platform.
+ *
+ * Returns: -1 on error,
+ *   0 if SGX is not supported, and
+ *   1 if SGX is supported on the platform.
+ */
+int
+qemuMonitorJSONGetSGXCapabilities(qemuMonitor *mon,
+  virSGXCapability **capabilities)
+{
+g_autoptr(virJSONValue) cmd = NULL;
+g_autoptr(virJSONValue) reply = NULL;
+g_autoptr(virSGXCapability) capability = NULL;
+unsigned long long section_size_sum = 0;
+virJSONValue *sgxSections = NULL;
+virJSONValue *caps;
+size_t i;
+
+*capabilities = NULL;
+capability = g_new0(virSGXCapability, 1);
+
+if (!(cmd = qemuMonitorJSONMakeCommand("query-sgx-capabilities", NULL)))
+return -1;
+
+if (qemuMonitorJSONCommand(mon, cmd, ) < 0)
+return -1;
+
+/* QEMU has only compiled-in support of SGX */
+if (qemuMonitorJSONHasError(reply, "GenericError"))
+return 0;
+
+if (qemuMonitorJSONCheckError(cmd, reply) < 0)
+return -1;
+
+caps = virJSONValueObjectGetObject(reply, "return");
+
+if (virJSONValueObjectGetBoolean(caps, "flc", >flc) < 0) {
+virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+   _("query-sgx-capabilities reply was missing 'flc' 
field"));
+return -1;
+}
+
+if (virJSONValueObjectGetBoolean(caps, "sgx1", >sgx1) < 0) {
+virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+   _("query-sgx-capabilities reply was missing 'sgx1' 
field"));
+return -1;
+}
+
+if (virJSONValueObjectGetBoolean(caps, "sgx2", >sgx2) < 0) {
+virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+   _("query-sgx-capabilities reply was missing 'sgx2' 
field"));
+return -1;
+}
+
+if ((sgxSections = virJSONValueObjectGetArray(caps, "sections"))) {
+/* SGX EPC sections info was added since QEMU 7.0.0 */
+unsigned long long size;
+
+capability->nSgxSections = virJSONValueArraySize(sgxSections);
+capability->sgxSections = g_new0(virSGXSection, 
capability->nSgxSections);
+
+for (i = 0; i < capability->nSgxSections; i++) {
+virJSONValue *elem = virJSONValueArrayGet(sgxSections, i);
+
+if (virJSONValueObjectGetNumberUlong(elem, "size", ) < 0) {
+

[libvirt][PATCH v17 6/9] qemu_cgroup: Allow SGX in devices controller

2022-11-10 Thread Lin Yang
From: Michal Privoznik 

SGX memory backend needs to access /dev/sgx_vepc (which allows
userspace to allocate "raw" EPC without an associated enclave)
and /dev/sgx_provision (which allows creating provisioning
enclaves). Allow these two devices in CGroups if a domain is
configured so.

Signed-off-by: Michal Privoznik 
Signed-off-by: Haibin Huang 
---
 src/qemu/qemu_cgroup.c | 78 +++---
 src/qemu/qemu_domain.h |  2 ++
 2 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index d6f27a5a4d..9cd4bf8b98 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -120,6 +120,28 @@ qemuCgroupDenyDevicePath(virDomainObj *vm,
 }
 
 
+static int
+qemuCgroupDenyDevicesPaths(virDomainObj *vm,
+   const char *const *paths,
+   int perms,
+   bool ignoreEacces)
+{
+size_t i;
+
+for (i = 0; paths[i] != NULL; i++) {
+if (!virFileExists(paths[i])) {
+VIR_DEBUG("Ignoring non-existent device %s", paths[i]);
+continue;
+}
+
+if (qemuCgroupDenyDevicePath(vm, paths[i], perms, ignoreEacces) < 0)
+return -1;
+}
+
+return 0;
+}
+
+
 static int
 qemuSetupImagePathCgroup(virDomainObj *vm,
  const char *path,
@@ -520,16 +542,32 @@ qemuSetupMemoryDevicesCgroup(virDomainObj *vm,
  virDomainMemoryDef *mem)
 {
 qemuDomainObjPrivate *priv = vm->privateData;
-
-if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
-mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM)
-return 0;
+const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
+ QEMU_DEV_SGX_PROVISION, NULL };
 
 if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
 return 0;
 
-return qemuCgroupAllowDevicePath(vm, mem->nvdimmPath,
- VIR_CGROUP_DEVICE_RW, false);
+switch (mem->model) {
+case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
+if (qemuCgroupAllowDevicePath(vm, mem->nvdimmPath,
+  VIR_CGROUP_DEVICE_RW, false) < 0)
+return -1;
+break;
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+if (qemuCgroupAllowDevicesPaths(vm, sgxPaths,
+VIR_CGROUP_DEVICE_RW, false) < 0)
+return -1;
+break;
+case VIR_DOMAIN_MEMORY_MODEL_NONE:
+case VIR_DOMAIN_MEMORY_MODEL_DIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
+case VIR_DOMAIN_MEMORY_MODEL_LAST:
+break;
+}
+
+return 0;
 }
 
 
@@ -538,16 +576,32 @@ qemuTeardownMemoryDevicesCgroup(virDomainObj *vm,
 virDomainMemoryDef *mem)
 {
 qemuDomainObjPrivate *priv = vm->privateData;
-
-if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM &&
-mem->model != VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM)
-return 0;
+const char *const sgxPaths[] = { QEMU_DEV_SGX_VEPVC,
+QEMU_DEV_SGX_PROVISION, NULL };
 
 if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
 return 0;
 
-return qemuCgroupDenyDevicePath(vm, mem->nvdimmPath,
-VIR_CGROUP_DEVICE_RWM, false);
+switch (mem->model) {
+case VIR_DOMAIN_MEMORY_MODEL_NVDIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_PMEM:
+if (qemuCgroupDenyDevicePath(vm, mem->nvdimmPath,
+ VIR_CGROUP_DEVICE_RWM, false) < 0)
+return -1;
+break;
+case VIR_DOMAIN_MEMORY_MODEL_SGX_EPC:
+if (qemuCgroupDenyDevicesPaths(vm, sgxPaths,
+   VIR_CGROUP_DEVICE_RW, false) < 0)
+return -1;
+break;
+case VIR_DOMAIN_MEMORY_MODEL_NONE:
+case VIR_DOMAIN_MEMORY_MODEL_DIMM:
+case VIR_DOMAIN_MEMORY_MODEL_VIRTIO_MEM:
+case VIR_DOMAIN_MEMORY_MODEL_LAST:
+break;
+}
+
+return 0;
 }
 
 
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 7950c4c2da..d5f4fbad12 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -81,6 +81,8 @@ struct _qemuDomainUnpluggingDevice {
 #define QEMU_DEVPREFIX "/dev/"
 #define QEMU_DEV_VFIO "/dev/vfio/vfio"
 #define QEMU_DEV_SEV "/dev/sev"
+#define QEMU_DEV_SGX_VEPVC "/dev/sgx_vepc"
+#define QEMU_DEV_SGX_PROVISION "/dev/sgx_provision"
 #define QEMU_DEVICE_MAPPER_CONTROL_PATH "/dev/mapper/control"
 
 
-- 
2.25.1



[libvirt][PATCH v17 3/9] Convert QMP capabilities to domain capabilities

2022-11-10 Thread Lin Yang
From: Haibin Huang 

the QMP capabilities:
  {"return":
{
  "sgx": true,
  "section-size": 1024,
  "flc": true
}
  }

the domain capabilities:
  
yes
1
  

Signed-off-by: Michal Privoznik 
Signed-off-by: Haibin Huang 
---
 src/qemu/qemu_capabilities.c  | 204 ++
 src/qemu/qemu_capabilities.h  |   4 +
 .../caps_6.2.0.x86_64.replies |  21 +-
 .../caps_7.0.0.x86_64.replies |  34 ++-
 .../caps_7.0.0.x86_64.xml |  11 +
 .../caps_7.1.0.x86_64.replies |  21 +-
 .../caps_7.2.0.x86_64.replies |  21 +-
 7 files changed, 300 insertions(+), 16 deletions(-)

diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index a2031e9aaa..7c9d7ed885 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -677,6 +677,7 @@ VIR_ENUM_IMPL(virQEMUCaps,
   /* 435 */
   "query-stats", /* QEMU_CAPS_QUERY_STATS */
   "query-stats-schemas", /* QEMU_CAPS_QUERY_STATS_SCHEMAS */
+  "sgx-epc", /* QEMU_CAPS_SGX_EPC */
 );
 
 
@@ -758,6 +759,8 @@ struct _virQEMUCaps {
 
 virSEVCapability *sevCapabilities;
 
+virSGXCapability *sgxCapabilities;
+
 /* Capabilities which may differ depending on the accelerator. */
 virQEMUCapsAccel kvm;
 virQEMUCapsAccel hvf;
@@ -1380,6 +1383,7 @@ struct virQEMUCapsStringFlags virQEMUCapsObjectTypes[] = {
 { "s390-pv-guest", QEMU_CAPS_S390_PV_GUEST },
 { "virtio-mem-pci", QEMU_CAPS_DEVICE_VIRTIO_MEM_PCI },
 { "virtio-iommu-pci", QEMU_CAPS_DEVICE_VIRTIO_IOMMU_PCI },
+{ "sgx-epc", QEMU_CAPS_SGX_EPC },
 };
 
 
@@ -1891,6 +1895,36 @@ virQEMUCapsSEVInfoCopy(virSEVCapability **dst,
 }
 
 
+static int
+virQEMUCapsSGXInfoCopy(virSGXCapability **dst,
+   virSGXCapability *src)
+{
+g_autoptr(virSGXCapability) tmp = NULL;
+
+if (!src) {
+*dst = NULL;
+return 0;
+}
+
+tmp = g_new0(virSGXCapability, 1);
+
+tmp->flc = src->flc;
+tmp->sgx1 = src->sgx1;
+tmp->sgx2 = src->sgx2;
+tmp->section_size = src->section_size;
+
+if (src->nSgxSections > 0) {
+tmp->sgxSections = g_new0(virSGXSection, src->nSgxSections);
+memcpy(tmp->sgxSections, src->sgxSections,
+   src->nSgxSections * sizeof(*tmp->sgxSections));
+tmp->nSgxSections = src->nSgxSections;
+}
+
+*dst = g_steal_pointer();
+return 0;
+}
+
+
 static void
 virQEMUCapsAccelCopyMachineTypes(virQEMUCapsAccel *dst,
  virQEMUCapsAccel *src)
@@ -1972,6 +2006,12 @@ virQEMUCaps *virQEMUCapsNewCopy(virQEMUCaps *qemuCaps)
qemuCaps->sevCapabilities) < 0)
 return NULL;
 
+
+if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_SGX_EPC) &&
+virQEMUCapsSGXInfoCopy(>sgxCapabilities,
+   qemuCaps->sgxCapabilities) < 0)
+return NULL;
+
 return g_steal_pointer();
 }
 
@@ -2010,6 +2050,7 @@ void virQEMUCapsDispose(void *obj)
 virCPUDataFree(qemuCaps->cpuData);
 
 virSEVCapabilitiesFree(qemuCaps->sevCapabilities);
+virSGXCapabilitiesFree(qemuCaps->sgxCapabilities);
 
 virQEMUCapsAccelClear(>kvm);
 virQEMUCapsAccelClear(>hvf);
@@ -2541,6 +2582,13 @@ virQEMUCapsGetSEVCapabilities(virQEMUCaps *qemuCaps)
 }
 
 
+virSGXCapability *
+virQEMUCapsGetSGXCapabilities(virQEMUCaps *qemuCaps)
+{
+return qemuCaps->sgxCapabilities;
+}
+
+
 static int
 virQEMUCapsProbeQMPCommands(virQEMUCaps *qemuCaps,
 qemuMonitor *mon)
@@ -3373,6 +3421,31 @@ virQEMUCapsProbeQMPSEVCapabilities(virQEMUCaps *qemuCaps,
 }
 
 
+static int
+virQEMUCapsProbeQMPSGXCapabilities(virQEMUCaps *qemuCaps,
+   qemuMonitor *mon)
+{
+int rc = -1;
+virSGXCapability *caps = NULL;
+
+if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SGX_EPC))
+return 0;
+
+if ((rc = qemuMonitorGetSGXCapabilities(mon, )) < 0)
+return -1;
+
+/* SGX isn't actually supported */
+if (rc == 0) {
+virQEMUCapsClear(qemuCaps, QEMU_CAPS_SGX_EPC);
+return 0;
+}
+
+virSGXCapabilitiesFree(qemuCaps->sgxCapabilities);
+qemuCaps->sgxCapabilities = caps;
+return 0;
+}
+
+
 /*
  * Filter for features which should never be passed to QEMU. Either because
  * QEMU never supported them or they were dropped as they never did anything
@@ -4167,6 +4240,97 @@ virQEMUCapsParseSEVInfo(virQEMUCaps *qemuCaps, 
xmlXPathContextPtr ctxt)
 }
 
 
+static int
+virQEMUCapsParseSGXInfo(virQEMUCaps *qemuCaps,
+xmlXPathContextPtr ctxt)
+{
+g_autoptr(virSGXCapability) sgx = NULL;
+xmlNodePtr sgxSections = NULL;
+g_autofree char *flc = NULL;
+g_autofree char *sgx1 = NULL;
+g_autofree char *sgx2 = NULL;
+
+if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_SGX_EPC))
+return 0;
+
+if 

[libvirt][PATCH v17 1/9] domain_capabilities: Define SGX capabilities structs

2022-11-10 Thread Lin Yang
From: Haibin Huang 

Signed-off-by: Michal Privoznik 
Reviewed-by: Peter Krempa 
Signed-off-by: Haibin Huang 
---
 src/conf/domain_capabilities.c | 11 +++
 src/conf/domain_capabilities.h | 22 ++
 src/libvirt_private.syms   |  1 +
 3 files changed, 34 insertions(+)

diff --git a/src/conf/domain_capabilities.c b/src/conf/domain_capabilities.c
index a7f256e4ec..daeaee3c5c 100644
--- a/src/conf/domain_capabilities.c
+++ b/src/conf/domain_capabilities.c
@@ -76,6 +76,17 @@ virSEVCapabilitiesFree(virSEVCapability *cap)
 }
 
 
+void
+virSGXCapabilitiesFree(virSGXCapability *cap)
+{
+if (!cap)
+return;
+
+g_free(cap->sgxSections);
+g_free(cap);
+}
+
+
 static void
 virDomainCapsDispose(void *obj)
 {
diff --git a/src/conf/domain_capabilities.h b/src/conf/domain_capabilities.h
index e0cfa75531..1d504a3506 100644
--- a/src/conf/domain_capabilities.h
+++ b/src/conf/domain_capabilities.h
@@ -208,6 +208,22 @@ struct _virSEVCapability {
 unsigned int max_es_guests;
 };
 
+typedef struct _virSGXSection virSGXSection;
+struct _virSGXSection {
+unsigned long long size;
+unsigned int node;
+};
+
+typedef struct _virSGXCapability virSGXCapability;
+struct _virSGXCapability {
+bool flc;
+bool sgx1;
+bool sgx2;
+unsigned long long section_size;
+size_t nSgxSections;
+virSGXSection *sgxSections;
+};
+
 typedef enum {
 VIR_DOMAIN_CAPS_FEATURE_IOTHREADS = 0,
 VIR_DOMAIN_CAPS_FEATURE_VMCOREINFO,
@@ -246,6 +262,7 @@ struct _virDomainCaps {
 
 virDomainCapsFeatureGIC gic;
 virSEVCapability *sev;
+virSGXCapability *sgx;
 /* add new domain features here */
 
 virTristateBool features[VIR_DOMAIN_CAPS_FEATURE_LAST];
@@ -296,3 +313,8 @@ void
 virSEVCapabilitiesFree(virSEVCapability *capabilities);
 
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(virSEVCapability, virSEVCapabilitiesFree);
+
+void
+virSGXCapabilitiesFree(virSGXCapability *capabilities);
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(virSGXCapability, virSGXCapabilitiesFree);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 97ff2a43e4..ebd7bc61a8 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -218,6 +218,7 @@ virDomainCapsEnumSet;
 virDomainCapsFormat;
 virDomainCapsNew;
 virSEVCapabilitiesFree;
+virSGXCapabilitiesFree;
 
 
 # conf/domain_conf.h
-- 
2.25.1



[libvirt][PATCH v17 0/9] Support query and use SGX

2022-11-10 Thread Lin Yang
Diff to v16:
* Included SGX EPC in the calculation and validation of maximum
  memory space in qemuDomainDefValidateMemoryHotplug. Removed
  all hacking in this function, but only skip
  qemuDomainDefValidateMemoryHotplugDevice validation for SGX EPC,
  since it is not hotpluggable.
* Added SGX fields in new QEMU 7.2 domaincaps xml.

Haibin Huang (4):
  domain_capabilities: Define SGX capabilities structs
  qemu: Get SGX capabilities form QMP
  Convert QMP capabilities to domain capabilities
  conf: expose SGX feature in domain capabilities

Lin Yang (2):
  conf: Introduce SGX EPC element into device memory xml
  qemu: Add command-line to generate SGX EPC memory backend

Michal Prívozník (3):
  qemu_cgroup: Allow SGX in devices controller
  qemu_namespace: Create SGX related nodes in domain's namespace
  security_dac: Set DAC label on SGX /dev nodes

 docs/formatdomain.rst |  25 +-
 docs/formatdomaincaps.rst |  40 
 src/conf/domain_capabilities.c|  47 
 src/conf/domain_capabilities.h|  22 ++
 src/conf/domain_conf.c|  30 +++
 src/conf/domain_conf.h|   1 +
 src/conf/domain_postparse.c   |   1 +
 src/conf/domain_validate.c|   9 +
 src/conf/schemas/domaincaps.rng   |  43 
 src/conf/schemas/domaincommon.rng |   1 +
 src/libvirt_private.syms  |   1 +
 src/qemu/qemu_alias.c |   6 +-
 src/qemu/qemu_capabilities.c  | 220 ++
 src/qemu/qemu_capabilities.h  |   4 +
 src/qemu/qemu_cgroup.c|  78 ++-
 src/qemu/qemu_command.c   |  66 +-
 src/qemu/qemu_domain.c|  28 ++-
 src/qemu/qemu_domain.h|   2 +
 src/qemu/qemu_domain_address.c|   6 +
 src/qemu/qemu_driver.c|   1 +
 src/qemu/qemu_monitor.c   |  10 +
 src/qemu/qemu_monitor.h   |   3 +
 src/qemu/qemu_monitor_json.c  | 154 +++-
 src/qemu/qemu_monitor_json.h  |   4 +
 src/qemu/qemu_namespace.c |  20 +-
 src/qemu/qemu_process.c   |   2 +
 src/qemu/qemu_validate.c  |  40 
 src/security/security_apparmor.c  |   1 +
 src/security/security_dac.c   |  46 ++--
 src/security/security_selinux.c   |   2 +
 tests/domaincapsdata/bhyve_basic.x86_64.xml   |   1 +
 tests/domaincapsdata/bhyve_fbuf.x86_64.xml|   1 +
 tests/domaincapsdata/bhyve_uefi.x86_64.xml|   1 +
 tests/domaincapsdata/empty.xml|   1 +
 tests/domaincapsdata/libxl-xenfv.xml  |   1 +
 tests/domaincapsdata/libxl-xenpv.xml  |   1 +
 .../domaincapsdata/qemu_4.2.0-q35.x86_64.xml  |   1 +
 .../domaincapsdata/qemu_4.2.0-tcg.x86_64.xml  |   1 +
 .../qemu_4.2.0-virt.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_4.2.0.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_4.2.0.ppc64.xml |   1 +
 tests/domaincapsdata/qemu_4.2.0.s390x.xml |   1 +
 tests/domaincapsdata/qemu_4.2.0.x86_64.xml|   1 +
 .../domaincapsdata/qemu_5.0.0-q35.x86_64.xml  |   1 +
 .../domaincapsdata/qemu_5.0.0-tcg.x86_64.xml  |   1 +
 .../qemu_5.0.0-virt.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_5.0.0.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_5.0.0.ppc64.xml |   1 +
 tests/domaincapsdata/qemu_5.0.0.x86_64.xml|   1 +
 .../domaincapsdata/qemu_5.1.0-q35.x86_64.xml  |   1 +
 .../domaincapsdata/qemu_5.1.0-tcg.x86_64.xml  |   1 +
 tests/domaincapsdata/qemu_5.1.0.sparc.xml |   1 +
 tests/domaincapsdata/qemu_5.1.0.x86_64.xml|   1 +
 .../domaincapsdata/qemu_5.2.0-q35.x86_64.xml  |   1 +
 .../domaincapsdata/qemu_5.2.0-tcg.x86_64.xml  |   1 +
 .../qemu_5.2.0-virt.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_5.2.0.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_5.2.0.ppc64.xml |   1 +
 tests/domaincapsdata/qemu_5.2.0.s390x.xml |   1 +
 tests/domaincapsdata/qemu_5.2.0.x86_64.xml|   1 +
 .../domaincapsdata/qemu_6.0.0-q35.x86_64.xml  |   1 +
 .../domaincapsdata/qemu_6.0.0-tcg.x86_64.xml  |   1 +
 .../qemu_6.0.0-virt.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_6.0.0.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_6.0.0.s390x.xml |   1 +
 tests/domaincapsdata/qemu_6.0.0.x86_64.xml|   1 +
 .../domaincapsdata/qemu_6.1.0-q35.x86_64.xml  |   1 +
 .../domaincapsdata/qemu_6.1.0-tcg.x86_64.xml  |   1 +
 tests/domaincapsdata/qemu_6.1.0.x86_64.xml|   1 +
 .../domaincapsdata/qemu_6.2.0-q35.x86_64.xml  |   1 +
 .../domaincapsdata/qemu_6.2.0-tcg.x86_64.xml  |   1 +
 .../qemu_6.2.0-virt.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_6.2.0.aarch64.xml   |   1 +
 tests/domaincapsdata/qemu_6.2.0.ppc64.xml |   1 +
 

Re: [libvirt][PATCH v16 5/9] conf: Introduce SGX EPC element into device memory xml

2022-11-10 Thread Yang, Lin A
On 11/9/22, 11:37 PM, "Peter Krempa"  wrote:
> On Thu, Nov 10, 2022 at 06:09:46 +, Yang, Lin A wrote:
> > On 11/8/22, 5:16 AM, "Peter Krempa"  wrote:
> > > On Tue, Nov 08, 2022 at 12:25:26 +, Daniel P. Berrangé wrote:
> > > > On Fri, Oct 14, 2022 at 01:12:28PM +0200, Michal Prívozník wrote:
> > > > > On 10/8/22 06:00, Lin Yang wrote:
> > >
> > > [...]
> > >
> > > > >
> > > > > # ./qemu-system-x86_64 -S -nographic -nodefaults -m 128 \
> > > > > -machine pc,sgx-epc.0.memdev=memepc0,sgx-epc.0.node=0 \
> > > > > -object 
> > > > > '{"qom-type":"memory-backend-epc","id":"memepc0","prealloc":true,"size":67108864,"host-nodes":[0],"policy":"bind"}'
> > > > >  \
> > > > > -monitor stdio
> > > > > QEMU 7.1.50 monitor - type 'help' for more information
> > > > > (qemu) info memory-devices
> > > > > Memory device [sgx-epc]: ""
> > > > >   memaddr: 0x1
> > > > >   size: 67108864
> > > > >   node: 0
> > > > >   memdev: /objects/memepc0
> > > > > (qemu) info memory_size_summary
> > > > > base memory: 134217728
> > > > > plugged memory: 0
> > > > > (qemu)
> > > >
> > > > I'm not sure this check is showing us the truth.
> > > >
> > > > In backends/hostmem-epc.c, sgx_epc_backend_memory_alloc is
> > > > opening /dev/sgx_vepc and mmap()ing the requested size from
> > > > that file. IOW that's clearly in addition to whatever has
> > > > been mapped as the main RAM.
> > > >
> > > > In hw/i386/sgx-epc.c, sgx_epc_md_get_plugged_size is  hardcoded
> > > > to always return 0, which is why 'plugged memory' is reported
> > > > as zero above. I don't know what it is reporting zero.
> > > >
> > > > Is this because the SGX RAM is not accessible to the guest OS
> > > > as "normal" RAM perhaps, and thus to be reported differently.
> > >
> > > So even if the memory is not accessible as normal RAM, but still is
> > > usable by the guest OS, the use of an  element is okay,
> > > but the total memory size of a VM should account for it.
> > >
> > > So in fact all the hacks which exclude it from the total memory size
> > > should be removed.
> >
> > QEMU reports zero 'plugged memory' for SGX RAM because it will not
> > add more normal RAM to guest OS and it cannot be hotplug/unplug on
> > the fly, since it is realized through CPUID and has to be initialized before
> > vcpu.
> >
> > One the host, BIOS reserves part of RAM as SGX RAM, which cannot be
> > directly accessed by other application or kernel. The host OS only
> > see RAM size = physical memory size – SGX RAM.
> >
> > Does “total memory size” here means all memory device size, like all
> > physical memory size on host, not normal RAM size? If not, it might
> > give confusing info to user since guset OS shows a different normal RAM
> > size.
>
> Libvirt's total memory size must include any memory usable by the guest
> OS regardless of how it is made available to it and regardless of
> whether un-modified OS is able to use it. Same applies e.g. to
> virtio-mem which requires a guest driver.
>
> Similarly the EPC memory most likely is provided to the guest via normal
> memory mapping (If not make sure to explicitly point it out next time.)
> and thus also must be included in the calculation and validation of
> maximum memory address space. This calculation/validation is done in
> qemuDomainDefValidateMemoryHotplug and thus is correct and EPC must be
> included in it. The funciton and variable naming may be confusing so
> you can change it, but functionally it seems it must be included in the
> calculation.

Thank you so much for explanation in details.

True, EPC is mapped to separate memory region in guest via mmap(). It’s
clear to me now. Let me quickly resolve it in next patch.

Regards,
Lin.


[PATCH 7/8] network: firewalld: add policies for NAT networks

2022-11-10 Thread Eric Garver
Signed-off-by: Eric Garver 
---
 libvirt.spec.in|  1 +
 src/network/libvirt-nat-out.policy | 13 +
 src/network/libvirt-to-host.policy |  1 +
 src/network/meson.build|  5 +
 4 files changed, 20 insertions(+)
 create mode 100644 src/network/libvirt-nat-out.policy

diff --git a/libvirt.spec.in b/libvirt.spec.in
index 6537b9385a0e..6a852d726e55 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -1922,6 +1922,7 @@ exit 0
 %{_prefix}/lib/firewalld/zones/libvirt.xml
 %{_prefix}/lib/firewalld/zones/libvirt-nat.xml
 %{_prefix}/lib/firewalld/zones/libvirt-routed.xml
+%{_prefix}/lib/firewalld/policies/libvirt-nat-out.xml
 %{_prefix}/lib/firewalld/policies/libvirt-routed-in.xml
 %{_prefix}/lib/firewalld/policies/libvirt-routed-out.xml
 %{_prefix}/lib/firewalld/policies/libvirt-to-host.xml
diff --git a/src/network/libvirt-nat-out.policy 
b/src/network/libvirt-nat-out.policy
new file mode 100644
index ..ed19be90c751
--- /dev/null
+++ b/src/network/libvirt-nat-out.policy
@@ -0,0 +1,13 @@
+
+
+  libvirt-nat-out
+
+  
+This policy is used to allow NAT virtual machine traffic to the rest of
+the network.
+  
+
+  
+  
+  
+
diff --git a/src/network/libvirt-to-host.policy 
b/src/network/libvirt-to-host.policy
index b20aecaf4249..a22952ea1c95 100644
--- a/src/network/libvirt-to-host.policy
+++ b/src/network/libvirt-to-host.policy
@@ -7,6 +7,7 @@
 host.
   
 
+  
   
   
 
diff --git a/src/network/meson.build b/src/network/meson.build
index fa18cbb8ff62..34f336fa222e 100644
--- a/src/network/meson.build
+++ b/src/network/meson.build
@@ -116,6 +116,11 @@ if conf.has('WITH_NETWORK')
   install_dir: prefix / 'lib' / 'firewalld' / 'policies',
   rename: [ 'libvirt-to-host.xml' ],
 )
+install_data(
+  'libvirt-nat-out.policy',
+  install_dir: prefix / 'lib' / 'firewalld' / 'policies',
+  rename: [ 'libvirt-nat-out.xml' ],
+)
 install_data(
   'libvirt-routed-out.policy',
   install_dir: prefix / 'lib' / 'firewalld' / 'policies',
-- 
2.37.3



[PATCH 3/8] network: firewalld: use native routed networks

2022-11-10 Thread Eric Garver
The firewalld backend for routed networks can now use a native
implementation. The hybrid of iptables + firewalld is no longer
necessary. When full native firewalld is in use there are zero iptables
rules add by libvirt.

This is accomplished by returning early in networkAddFirewallRules() and
avoiding calls to networkSetupPrivateChains().

Signed-off-by: Eric Garver 
---
 src/network/bridge_driver_linux.c | 51 +--
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/network/bridge_driver_linux.c 
b/src/network/bridge_driver_linux.c
index 88a8e9c5fa27..42f098ff1f9b 100644
--- a/src/network/bridge_driver_linux.c
+++ b/src/network/bridge_driver_linux.c
@@ -133,6 +133,21 @@ networkHasRunningNetworksWithFW(virNetworkDriverState 
*driver)
 }
 
 
+static bool
+networkUseOnlyFirewallDRules(void)
+{
+if (virFirewallDIsRegistered() < 0)
+return false;
+
+if (virFirewallDPolicyExists("libvirt-routed-out") &&
+virFirewallDZoneExists("libvirt-routed")) {
+return true;
+}
+
+return false;
+}
+
+
 void
 networkPreReloadFirewallRules(virNetworkDriverState *driver,
   bool startup G_GNUC_UNUSED,
@@ -172,6 +187,9 @@ networkPreReloadFirewallRules(virNetworkDriverState *driver,
 return;
 }
 
+if (!chainInitDone && networkUseOnlyFirewallDRules())
+return;
+
 ignore_value(virOnce(, networkSetupPrivateChains));
 }
 }
@@ -801,6 +819,18 @@ networkRemoveIPSpecificFirewallRules(virFirewall *fw,
 }
 
 
+static int
+networkAddOnlyFirewallDRules(virNetworkDef *def)
+{
+if (def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
+if (virFirewallDInterfaceSetZone(def->bridge, "libvirt-routed") < 0)
+return -1;
+}
+
+return 0;
+}
+
+
 static int
 networkAddHybridFirewallDRules(virNetworkDef *def)
 {
@@ -860,6 +890,11 @@ int networkAddFirewallRules(virNetworkDef *def)
 virNetworkIPDef *ipdef;
 g_autoptr(virFirewall) fw = virFirewallNew();
 
+if (!def->bridgeZone && networkUseOnlyFirewallDRules() &&
+def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
+return networkAddOnlyFirewallDRules(def);
+}
+
 if (virOnce(, networkSetupPrivateChains) < 0)
 return -1;
 
@@ -895,15 +930,8 @@ int networkAddFirewallRules(virNetworkDef *def)
 return -1;
 
 } else if (virFirewallDIsRegistered() == 0) {
-if (def->forward.type == VIR_NETWORK_FORWARD_ROUTE &&
-virFirewallDPolicyExists("libvirt-routed-out") &&
-virFirewallDZoneExists("libvirt-routed")) {
-if (virFirewallDInterfaceSetZone(def->bridge, "libvirt-routed") < 
0)
-return -1;
-} else {
-if (networkAddHybridFirewallDRules(def) < 0)
-return -1;
-}
+if (networkAddHybridFirewallDRules(def) < 0)
+return -1;
 }
 
 virFirewallStartTransaction(fw, 0);
@@ -940,6 +968,11 @@ void networkRemoveFirewallRules(virNetworkDef *def)
 virNetworkIPDef *ipdef;
 g_autoptr(virFirewall) fw = virFirewallNew();
 
+if (!def->bridgeZone && networkUseOnlyFirewallDRules() &&
+def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
+return;
+}
+
 virFirewallStartTransaction(fw, VIR_FIREWALL_TRANSACTION_IGNORE_ERRORS);
 networkRemoveChecksumFirewallRules(fw, def);
 
-- 
2.37.3



[PATCH 6/8] network: firewalld: add zone for NAT networks

2022-11-10 Thread Eric Garver
This zone will be used for the NAT network by default.

Note that this zone definition omits "forward" aka intra-zone
forwarding, because it requires firewalld >= 0.9.0.

Signed-off-by: Eric Garver 
---
 libvirt.spec.in  |  1 +
 src/network/libvirt-nat.zone | 10 ++
 src/network/meson.build  |  5 +
 3 files changed, 16 insertions(+)
 create mode 100644 src/network/libvirt-nat.zone

diff --git a/libvirt.spec.in b/libvirt.spec.in
index ac5bf7b8653c..6537b9385a0e 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -1920,6 +1920,7 @@ exit 0
 
 %if %{with_firewalld_zone}
 %{_prefix}/lib/firewalld/zones/libvirt.xml
+%{_prefix}/lib/firewalld/zones/libvirt-nat.xml
 %{_prefix}/lib/firewalld/zones/libvirt-routed.xml
 %{_prefix}/lib/firewalld/policies/libvirt-routed-in.xml
 %{_prefix}/lib/firewalld/policies/libvirt-routed-out.xml
diff --git a/src/network/libvirt-nat.zone b/src/network/libvirt-nat.zone
new file mode 100644
index ..6ebffb189a56
--- /dev/null
+++ b/src/network/libvirt-nat.zone
@@ -0,0 +1,10 @@
+
+
+  libvirt-nat
+
+  
+This zone is intended to be used only by NAT libvirt virtual networks -
+libvirt will add the bridge devices for all new virtual networks to this
+zone by default.
+  
+
diff --git a/src/network/meson.build b/src/network/meson.build
index d266bb225a64..fa18cbb8ff62 100644
--- a/src/network/meson.build
+++ b/src/network/meson.build
@@ -101,6 +101,11 @@ if conf.has('WITH_NETWORK')
   install_dir: prefix / 'lib' / 'firewalld' / 'zones',
   rename: [ 'libvirt.xml' ],
 )
+install_data(
+  'libvirt-nat.zone',
+  install_dir: prefix / 'lib' / 'firewalld' / 'zones',
+  rename: [ 'libvirt-nat.xml' ],
+)
 install_data(
   'libvirt-routed.zone',
   install_dir: prefix / 'lib' / 'firewalld' / 'zones',
-- 
2.37.3



[PATCH 5/8] util: add virFirewallDApplyPolicyRichRules()

2022-11-10 Thread Eric Garver
Signed-off-by: Eric Garver 
---
 src/libvirt_private.syms |  1 +
 src/util/virfirewalld.c  | 44 
 src/util/virfirewalld.h  |  4 
 3 files changed, 49 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index c5882c535210..8fddb9aad11b 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2358,6 +2358,7 @@ virFirewallStartTransaction;
 
 
 # util/virfirewalld.h
+virFirewallDApplyPolicyRichRules;
 virFirewallDApplyRule;
 virFirewallDGetBackend;
 virFirewallDGetPolicies;
diff --git a/src/util/virfirewalld.c b/src/util/virfirewalld.c
index 07f9cdd1e485..9b3c1d84c48f 100644
--- a/src/util/virfirewalld.c
+++ b/src/util/virfirewalld.c
@@ -426,6 +426,50 @@ virFirewallDApplyRule(virFirewallLayer layer,
 return 0;
 }
 
+/**
+ * virFirewallDApplyPolicyRichRules:
+ * @policy: which policy to apply rules to
+ * @rules:  rules to apply, array of strings
+ * @rules_count:number of rules in rules array
+ *
+ * Returns 0 on success, non-zero on failure
+ */
+int
+virFirewallDApplyPolicyRichRules(const char *policy,
+ const char **rules,
+ size_t rules_count)
+{
+GDBusConnection *sysbus = virGDBusGetSystemBus();
+g_autoptr(GVariant) message = NULL;
+GVariant *array = NULL;
+GVariantBuilder builder;
+size_t i;
+
+if (!sysbus)
+return -1;
+
+g_variant_builder_init(, G_VARIANT_TYPE_STRING_ARRAY);
+for (i = 0; i < rules_count; i++) {
+g_variant_builder_add(, "s", rules[i]);
+}
+array = g_variant_builder_end();
+
+g_variant_builder_init(, G_VARIANT_TYPE_VARDICT);
+g_variant_builder_add(, "{sv}", "rich_rules", array);
+
+message = g_variant_new("(sa{sv})", policy, );
+
+return virGDBusCallMethod(sysbus,
+ NULL,
+ NULL,
+ NULL,
+ VIR_FIREWALL_FIREWALLD_SERVICE,
+ "/org/fedoraproject/FirewallD1",
+ "org.fedoraproject.FirewallD1.policy",
+ "setPolicySettings",
+ message);
+}
+
 
 int
 virFirewallDInterfaceSetZone(const char *iface,
diff --git a/src/util/virfirewalld.h b/src/util/virfirewalld.h
index 11aad7786dfb..9ff4e02e1d59 100644
--- a/src/util/virfirewalld.h
+++ b/src/util/virfirewalld.h
@@ -40,6 +40,10 @@ int virFirewallDApplyRule(virFirewallLayer layer,
   char **args, size_t argsLen,
   bool ignoreErrors,
   char **output);
+int virFirewallDApplyPolicyRichRules(const char *policy,
+ const char **rules,
+ size_t rules_count);
+
 
 int virFirewallDInterfaceSetZone(const char *iface,
  const char *zone);
-- 
2.37.3



[PATCH 2/8] network: firewalld: add networkAddHybridFirewallDRules()

2022-11-10 Thread Eric Garver
This factors out the firewalld pieces of the iptables + firewalld
backend.

Signed-off-by: Eric Garver 
---
 src/network/bridge_driver_linux.c | 117 --
 1 file changed, 61 insertions(+), 56 deletions(-)

diff --git a/src/network/bridge_driver_linux.c 
b/src/network/bridge_driver_linux.c
index d9597d91beed..88a8e9c5fa27 100644
--- a/src/network/bridge_driver_linux.c
+++ b/src/network/bridge_driver_linux.c
@@ -801,6 +801,58 @@ networkRemoveIPSpecificFirewallRules(virFirewall *fw,
 }
 
 
+static int
+networkAddHybridFirewallDRules(virNetworkDef *def)
+{
+/* if firewalld is active, try to set the "libvirt" zone. This is
+ * desirable (for consistency) if firewalld is using the iptables
+ * backend, but is necessary (for basic network connectivity) if
+ * firewalld is using the nftables backend
+ */
+
+/* if the "libvirt" zone exists, then set it. If not, and
+ * if firewalld is using the nftables backend, then we
+ * need to log an error because the combination of
+ * nftables + default zone means that traffic cannot be
+ * forwarded (and even DHCP and DNS from guest to host
+ * will probably no be permitted by the default zone
+ */
+if (virFirewallDZoneExists("libvirt")) {
+if (virFirewallDInterfaceSetZone(def->bridge, "libvirt") < 0)
+return -1;
+} else {
+unsigned long version;
+int vresult = virFirewallDGetVersion();
+
+if (vresult < 0)
+return -1;
+
+/* Support for nftables backend was added in firewalld
+ * 0.6.0. Support for rule priorities (required by the
+ * 'libvirt' zone, which should be installed by a
+ * libvirt package, *not* by firewalld) was not added
+ * until firewalld 0.7.0 (unless it was backported).
+ */
+if (version >= 6000 &&
+virFirewallDGetBackend() == VIR_FIREWALLD_BACKEND_NFTABLES) {
+virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+   _("firewalld is set to use the nftables "
+ "backend, but the required firewalld "
+ "'libvirt' zone is missing. Either set "
+ "the firewalld backend to 'iptables', or "
+ "ensure that firewalld has a 'libvirt' "
+ "zone by upgrading firewalld to a "
+ "version supporting rule priorities "
+ "(0.7.0+) and/or rebuilding "
+ "libvirt with --with-firewalld-zone"));
+return -1;
+}
+}
+
+return 0;
+}
+
+
 /* Add all rules for all ip addresses (and general rules) on a network */
 int networkAddFirewallRules(virNetworkDef *def)
 {
@@ -842,62 +894,15 @@ int networkAddFirewallRules(virNetworkDef *def)
 if (virFirewallDInterfaceSetZone(def->bridge, def->bridgeZone) < 0)
 return -1;
 
-} else {
-
-/* if firewalld is active, try to set the "libvirt" zone. This is
- * desirable (for consistency) if firewalld is using the iptables
- * backend, but is necessary (for basic network connectivity) if
- * firewalld is using the nftables backend
- */
-if (virFirewallDIsRegistered() == 0) {
-
-/* if the "libvirt" zone exists, then set it. If not, and
- * if firewalld is using the nftables backend, then we
- * need to log an error because the combination of
- * nftables + default zone means that traffic cannot be
- * forwarded (and even DHCP and DNS from guest to host
- * will probably no be permitted by the default zone
- *
- * Routed networks use a different zone and policy which we also
- * need to verify exist. Probing for the policy guarantees the
- * running firewalld has support for policies (firewalld >= 0.9.0).
- */
-if (def->forward.type == VIR_NETWORK_FORWARD_ROUTE &&
-virFirewallDPolicyExists("libvirt-routed-out") &&
-virFirewallDZoneExists("libvirt-routed")) {
-if (virFirewallDInterfaceSetZone(def->bridge, 
"libvirt-routed") < 0)
-return -1;
-} else if (virFirewallDZoneExists("libvirt")) {
-if (virFirewallDInterfaceSetZone(def->bridge, "libvirt") < 0)
-return -1;
-} else {
-unsigned long version;
-int vresult = virFirewallDGetVersion();
-
-if (vresult < 0)
-return -1;
-
-/* Support for nftables backend was added in firewalld
- * 0.6.0. Support for rule priorities (required by the
- * 'libvirt' zone, which should be installed by a
- * libvirt package, *not* by firewalld) was not added
- * until 

[PATCH 0/8] network: firewalld: native support for NAT/routed

2022-11-10 Thread Eric Garver
This series further improves the firewalld backend by converting to a
fully native implementation for NAT and routed networks. That is, there
are no iptables rules added by libvirt when the running firewalld is
0.9.0 or later.

The major advantage is that firewalld users can use firewall-cmd to
filter the VM traffic and apply their own policies.

When firewalld < 0.9.0 is present only the "libvirt" zone will be used.
The new "libvirt-nat" and "libvirt-routed" zones are not used. This
maintains compatibility for older distributions (e.g. Ubuntu 20.04).

Patch 1 is a bug fix for my previous series to avoid a bogus error log.

Patches 2-3 converts the routed network to native firewalld.

Patches 4-8 converts the NAT network to native firewalld. It also
introduces the "libvirt-nat" zone.

Eric Garver (8):
  util: virFirewallDGetPolicies: gracefully handle older firewalld
  network: firewalld: add networkAddHybridFirewallDRules()
  network: firewalld: use native routed networks
  util: add virFirewallDSourceSetZone()
  util: add virFirewallDApplyPolicyRichRules()
  network: firewalld: add zone for NAT networks
  network: firewalld: add policies for NAT networks
  network: firewalld: use native NAT networks

 libvirt.spec.in|   2 +
 src/libvirt_private.syms   |   2 +
 src/network/bridge_driver_linux.c  | 193 -
 src/network/libvirt-nat-out.policy |  13 ++
 src/network/libvirt-nat.zone   |  10 ++
 src/network/libvirt-to-host.policy |   1 +
 src/network/meson.build|  10 ++
 src/util/virfirewalld.c|  79 +++-
 src/util/virfirewalld.h|   6 +
 9 files changed, 258 insertions(+), 58 deletions(-)
 create mode 100644 src/network/libvirt-nat-out.policy
 create mode 100644 src/network/libvirt-nat.zone

-- 
2.37.3



[PATCH 1/8] util: virFirewallDGetPolicies: gracefully handle older firewalld

2022-11-10 Thread Eric Garver
If the running firewalld doesn't support getPolicies() then we fallback
to the "libvirt" zone. Throwing an error log is excessive since we
gracefully fallback.

Avoids these logs:

error : virGDBusCallMethod:242 : error from service: \
GDBus.Error:org.freedesktop.DBus.Error.UnknownMethod

Fixes: ab56f84976e0 ("util: add virFirewallDGetPolicies()")
Signed-off-by: Eric Garver 
---
 src/util/virfirewalld.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/util/virfirewalld.c b/src/util/virfirewalld.c
index ad879164c3a8..d11e974cc2d5 100644
--- a/src/util/virfirewalld.c
+++ b/src/util/virfirewalld.c
@@ -240,6 +240,7 @@ virFirewallDGetPolicies(char ***policies, size_t *npolicies)
 GDBusConnection *sysbus = virGDBusGetSystemBus();
 g_autoptr(GVariant) reply = NULL;
 g_autoptr(GVariant) array = NULL;
+g_autoptr(virError) error = NULL;
 
 *npolicies = 0;
 *policies = NULL;
@@ -247,10 +248,12 @@ virFirewallDGetPolicies(char ***policies, size_t 
*npolicies)
 if (!sysbus)
 return -1;
 
+error = g_new0(virError, 1);
+
 if (virGDBusCallMethod(sysbus,
,
G_VARIANT_TYPE("(as)"),
-   NULL,
+   error,
VIR_FIREWALL_FIREWALLD_SERVICE,
"/org/fedoraproject/FirewallD1",
"org.fedoraproject.FirewallD1.policy",
@@ -258,6 +261,12 @@ virFirewallDGetPolicies(char ***policies, size_t 
*npolicies)
NULL) < 0)
 return -1;
 
+if (error->level == VIR_ERR_ERROR) {
+if (!virGDBusErrorIsUnknownMethod(error))
+virReportErrorObject(error);
+return -1;
+}
+
 g_variant_get(reply, "(@as)", );
 *policies = g_variant_dup_strv(array, npolicies);
 
-- 
2.37.3



[PATCH 4/8] util: add virFirewallDSourceSetZone()

2022-11-10 Thread Eric Garver
Signed-off-by: Eric Garver 
---
 src/libvirt_private.syms |  1 +
 src/util/virfirewalld.c  | 24 
 src/util/virfirewalld.h  |  2 ++
 3 files changed, 27 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 97ff2a43e48a..c5882c535210 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2366,6 +2366,7 @@ virFirewallDGetZones;
 virFirewallDInterfaceSetZone;
 virFirewallDIsRegistered;
 virFirewallDPolicyExists;
+virFirewallDSourceSetZone;
 virFirewallDSynchronize;
 virFirewallDZoneExists;
 
diff --git a/src/util/virfirewalld.c b/src/util/virfirewalld.c
index d11e974cc2d5..07f9cdd1e485 100644
--- a/src/util/virfirewalld.c
+++ b/src/util/virfirewalld.c
@@ -451,6 +451,30 @@ virFirewallDInterfaceSetZone(const char *iface,
 }
 
 
+int
+virFirewallDSourceSetZone(const char *source,
+  const char *zone)
+{
+GDBusConnection *sysbus = virGDBusGetSystemBus();
+g_autoptr(GVariant) message = NULL;
+
+if (!sysbus)
+return -1;
+
+message = g_variant_new("(ss)", zone, source);
+
+return virGDBusCallMethod(sysbus,
+ NULL,
+ NULL,
+ NULL,
+ VIR_FIREWALL_FIREWALLD_SERVICE,
+ "/org/fedoraproject/FirewallD1",
+ "org.fedoraproject.FirewallD1.zone",
+ "changeZoneOfSource",
+ message);
+}
+
+
 void
 virFirewallDSynchronize(void)
 {
diff --git a/src/util/virfirewalld.h b/src/util/virfirewalld.h
index fa4c9e702ccb..11aad7786dfb 100644
--- a/src/util/virfirewalld.h
+++ b/src/util/virfirewalld.h
@@ -43,5 +43,7 @@ int virFirewallDApplyRule(virFirewallLayer layer,
 
 int virFirewallDInterfaceSetZone(const char *iface,
  const char *zone);
+int virFirewallDSourceSetZone(const char *source,
+  const char *zone);
 
 void virFirewallDSynchronize(void);
-- 
2.37.3



[PATCH 8/8] network: firewalld: use native NAT networks

2022-11-10 Thread Eric Garver
Use the new "libvirt-nat" zone for native NAT networks.

The "libvirt" zone is still in use, but only to handle DHCP packets.
Those won't be dispatched to the "libvirt-zone" because said zone is
using sources (instead of interfaces). DHCP packets don't have a valid
source address.

The use of "libvirt" zone is necessary due to a Linux < 5.5 limitation
in which nftables iifname cannot be matched in postrouting hook (i.e.
masquerade). In the future, when we can assume Linux 5.5+, we can
further improve this by attaching interfaces to the "libvirt-nat" zone
instead of using sources. Thus making the "libvirt" zone unnecessary.

Signed-off-by: Eric Garver 
---
 src/network/bridge_driver_linux.c | 55 +++
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/src/network/bridge_driver_linux.c 
b/src/network/bridge_driver_linux.c
index 42f098ff1f9b..d6c7d378f5f7 100644
--- a/src/network/bridge_driver_linux.c
+++ b/src/network/bridge_driver_linux.c
@@ -140,7 +140,10 @@ networkUseOnlyFirewallDRules(void)
 return false;
 
 if (virFirewallDPolicyExists("libvirt-routed-out") &&
-virFirewallDZoneExists("libvirt-routed")) {
+virFirewallDZoneExists("libvirt-routed") &&
+virFirewallDPolicyExists("libvirt-nat-out") &&
+virFirewallDZoneExists("libvirt-nat") &&
+virFirewallDZoneExists("libvirt")) {
 return true;
 }
 
@@ -825,6 +828,48 @@ networkAddOnlyFirewallDRules(virNetworkDef *def)
 if (def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
 if (virFirewallDInterfaceSetZone(def->bridge, "libvirt-routed") < 0)
 return -1;
+} else if (def->forward.type == VIR_NETWORK_FORWARD_NAT) {
+virNetworkIPDef *ipdef;
+size_t i;
+
+/* The initial DHCP packets won't be dispatched to the
+ * libvirt-nat zone because they don't yet have an IP address.
+ * The libvirt-nat zone needs to use sources instead of
+ * interfaces because kernels < 5.5 do not support matching
+ * iifname in postrouting.
+ *
+ * As a workaround, add the interface to the libvirt zone. This
+ * will allow dhcp to function. Afterwards packets will go to
+ * the libvirt-nat zone.
+ */
+if (virFirewallDInterfaceSetZone(def->bridge, "libvirt") < 0)
+return -1;
+
+for (i = 0;
+ (ipdef = virNetworkDefGetIPByIndex(def, AF_UNSPEC, i));
+ i++) {
+int prefix = virNetworkIPDefPrefix(ipdef);
+g_autofree char *networkstr = NULL;
+
+if (!(networkstr = virSocketAddrFormatWithPrefix(>address, 
prefix, true)))
+return -1;
+
+if (VIR_SOCKET_ADDR_IS_FAMILY(>address, AF_INET) ||
+def->forward.natIPv6 == VIR_TRISTATE_BOOL_YES) {
+if (virFirewallDSourceSetZone(networkstr, "libvirt-nat") < 0)
+return -1;
+if (def->forward.natIPv6 == VIR_TRISTATE_BOOL_YES) {
+const char *rich_rules[] = {"rule family=ipv6 masquerade"};
+size_t rich_rules_count = sizeof(rich_rules) / 
sizeof(rich_rules[0]);
+
+if (virFirewallDApplyPolicyRichRules("libvirt-nat-out", 
rich_rules, rich_rules_count) < 0)
+return -1;
+}
+} else if (VIR_SOCKET_ADDR_IS_FAMILY(>address, AF_INET6)) {
+if (virFirewallDSourceSetZone(networkstr, "libvirt-routed") < 
0)
+return -1;
+}
+}
 }
 
 return 0;
@@ -890,10 +935,8 @@ int networkAddFirewallRules(virNetworkDef *def)
 virNetworkIPDef *ipdef;
 g_autoptr(virFirewall) fw = virFirewallNew();
 
-if (!def->bridgeZone && networkUseOnlyFirewallDRules() &&
-def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
+if (!def->bridgeZone && networkUseOnlyFirewallDRules())
 return networkAddOnlyFirewallDRules(def);
-}
 
 if (virOnce(, networkSetupPrivateChains) < 0)
 return -1;
@@ -968,10 +1011,8 @@ void networkRemoveFirewallRules(virNetworkDef *def)
 virNetworkIPDef *ipdef;
 g_autoptr(virFirewall) fw = virFirewallNew();
 
-if (!def->bridgeZone && networkUseOnlyFirewallDRules() &&
-def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
+if (!def->bridgeZone && networkUseOnlyFirewallDRules())
 return;
-}
 
 virFirewallStartTransaction(fw, VIR_FIREWALL_TRANSACTION_IGNORE_ERRORS);
 networkRemoveChecksumFirewallRules(fw, def);
-- 
2.37.3



Re: libvirt-guests configurability regression

2022-11-10 Thread Laszlo Ersek
On 11/10/22 11:32, Andrea Bolognani wrote:
> On Thu, Nov 10, 2022 at 08:57:25AM +, Daniel P. Berrangé wrote:
>> On Wed, Nov 09, 2022 at 09:17:08PM +0100, Olaf Hering wrote:
>>> Wed, 9 Nov 2022 09:04:12 -0800 Andrea Bolognani :
 Olaf, can you please remind me why the files we dropped were
 problematic but these ones apparently aren't?
>>>
>>> These are equally problematic because they are owned by the admin as well.
>>>
>>> They are essentially empty, their content is a duplication from doc/ (I 
>>> hope).
>>> They should not be part of libvirt.git.
>>
>> We are not going to remove these files from git or from the RPM, it would
>> be incredibly user hostile. I didn't mind removing the sysconfig files
>> because they were legacy configs from initscripts era which are obsolete
>> with systemd and so should not really be used except for existing
>> deployments.

Hmm. That's a good distinction actually. So I guess my original
complaint should be rephrased as one against systemd style service
configuration -- then again, that ship has sailed ;)

Thanks for the enlightenment!

> Should we reconsider the fate of libvirt-guests's sysconfig file
> then?
> 
> The other sysconfig files basically only influenced the way each
> service is spawned (VIRT*_ARGS, mostly used for --timeout) but in the
> case of libvirt-guests the sysconfig file is used to configure the
> behavior in ways that don't really fall under the purview of systemd.

I'm not sure if I know enough to claim this myself -- the libvirt-guests
service's configuration may actually fall in systemd territory. I just
find the override files

  /etc/systemd/system/NAME.service.d/override.conf

from "systemctl edit" much less intuitive than the previous way.

BTW, now that I've retried "systemctl edit", it seems that the override
file is "primed" from the central service file
(/usr/lib/systemd/system/NAME.service) -- the latter is included,
commented out, when $EDITOR starts up. So if all the documentation and
the default knob assignments were included in
"/usr/lib/systemd/system/libvirt-guests.service", then "systemctl edit"
would indeed be a mostly complete replacement for the sysconfig file.

In fact, commit 8eb4461645c5 does indicate *just this* method -- but as
I pointed out up-thread, the central "libvirt-guests.service" file does
not include the actual knobs and their docs; those are (at best) in
"tools/libvirt-guests.sh.in".

Instead, the central service file currently contains (excerpt):

> [Service]
> EnvironmentFile=-/etc/sysconfig/libvirt-guests
> # Hack just call traditional service until we factor
> # out the code
> ExecStart=/usr/libexec/libvirt-guests.sh start
> ExecStop=/usr/libexec/libvirt-guests.sh stop

I guess *that* is the actual problem -- it's technical debt. This
"factoring out" (= the migration of the knobs and the docs to the
service file) should have *preceded* commit 8eb4461645c5. Commit
8eb4461645c5 alluded to the proper (modern) way for configuring
services, and with that argument removed the sysconfig file for
"libvirt-guests" -- but the libvirt-guests service had not converted
sufficiently for that.

Therefore I do claim that commit 8eb4461645c5 is buggy -- it was too early.

Olaf said up-thread, "the script needs to be adjusted to recognize
existing environment variables", and technically, that may indeed be the
solution; I'm just saying it was actualy a *pre-requisite* (missed or
ignored) for commit 8eb4461645c5.

> Maybe a proper /etc/libvirt/libvirt-guests.conf would make sense, for
> consistency with other services? But then, you don't want to be
> parsing libvirt's config format from a shell script. Could we rewrite
> libvirt-guests in C, possibly improving on the hairier parts of it in
> the process? Perhaps even take the opportunity to reconsider the way
> some of its settings interact with each other.
> 
> I might be getting a bit ahead of myself here :)
> 

I think the logic being implemented as a shell script is fine, but its
configuration (env vars), and the related documentation, needs to be
moved / added to the central service file, so that "systemctl edit" can
present them as a crutch to the user, formatted in comments.

The manual page is *not* a substitute for this.

Laszlo



Re: [PATCH v3] nodedev: ignore EINVAL from libudev in udevEventHandleThread

2022-11-10 Thread Michal Prívozník
On 11/10/22 10:36, christian.ehrha...@canonical.com wrote:
> From: Christian Ehrhardt 
> 
> Certain udev entries might be of a size that makes libudev emit EINVAL
> which right now leads to udevEventHandleThread exiting. Due to no more
> handling events other elements of libvirt will start pushing for events
> to be consumed which never happens causing a busy loop burning a cpu
> without any gain.
> 
> After evaluation of the example case discussed in in #245 and a test
> run ignoring EINVAL it was considered safe to add EINVAL to the ignored
> errnos to not exit udevEventHandleThread giving it more resilience.
> 
> The root cause is in systemd and by now was discussed and fixed via
> https://github.com/systemd/systemd/issues/24987, but hardening libvirt
> to be able to better deal with EINVAL returned still is the right thing
> to avoid the reported busy loops on systemd with older systemd versions.
> 
> Fixes: https://gitlab.com/libvirt/libvirt/-/issues/245
> 
> Signed-off-by: Christian Ehrhardt 
> Reviewed-by: Daniel P. Berrangé 
> ---
>  src/node_device/node_device_udev.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)

Pushed now, sorry for the delay.

Michal



Re: [PATCH] qemu: capabilities: Detect support for JSON args for -netdev

2022-11-10 Thread Michal Prívozník
On 11/9/22 18:18, Peter Krempa wrote:
> JSON args for -netdev were added as precursor for adding the 'dgram'
> network backend type. Enable the detection and update test cases using
> DO_TEST_CAPS_LATEST.
> 
> Enabling the capability also ensures that the -netdev argument is
> validated against the QAPI schema of 'netdev_add' which was already
> implemented but not enabled.
> 
> The parser supporting JSON was added by qemu commit f3eedcddba3 and
> enabled when adding stream/dgram netdevs in commit 5166fe0ae46.
> 
> Signed-off-by: Peter Krempa 
> ---
>  src/qemu/qemu_capabilities.c| 2 ++
>  tests/qemucapabilitiesdata/caps_7.2.0.x86_64.xml| 1 +
>  tests/qemuxml2argvdata/boot-complex.x86_64-latest.args  | 4 ++--
>  tests/qemuxml2argvdata/boot-order.x86_64-latest.args| 2 +-
>  .../channel-unix-guestfwd.x86_64-latest.args| 4 ++--
>  .../qemuxml2argvdata/devices-acpi-index.x86_64-latest.args  | 6 +++---
>  tests/qemuxml2argvdata/disk-ioeventfd.x86_64-latest.args| 2 +-
>  tests/qemuxml2argvdata/event_idx.x86_64-latest.args | 2 +-
>  .../graphics-spice-timeout.x86_64-latest.args   | 2 +-
>  tests/qemuxml2argvdata/name-escape.x86_64-latest.args   | 2 +-
>  tests/qemuxml2argvdata/net-user.x86_64-latest.args  | 2 +-
>  .../qemuxml2argvdata/net-vdpa-multiqueue.x86_64-latest.args | 2 +-
>  tests/qemuxml2argvdata/net-vdpa.x86_64-latest.args  | 2 +-
>  tests/qemuxml2argvdata/net-vhostuser.x86_64-latest.args | 6 +++---
>  tests/qemuxml2argvdata/net-virtio-rss.x86_64-latest.args| 6 +++---
>  tests/qemuxml2argvdata/q35-pcie-autoadd.x86_64-latest.args  | 4 ++--
>  tests/qemuxml2argvdata/q35-pcie.x86_64-latest.args  | 4 ++--
>  .../q35-virt-manager-basic.x86_64-latest.args   | 2 +-
>  tests/qemuxml2argvdata/user-aliases.x86_64-latest.args  | 6 +++---
>  tests/qemuxml2argvdata/virtio-lun.x86_64-latest.args| 2 +-
>  .../virtio-non-transitional.x86_64-latest.args  | 2 +-
>  .../virtio-options-net-ats.x86_64-latest.args   | 4 ++--
>  .../virtio-options-net-iommu.x86_64-latest.args | 4 ++--
>  .../virtio-options-net-packed.x86_64-latest.args| 4 ++--
>  tests/qemuxml2argvdata/virtio-options.x86_64-latest.args| 2 +-
>  .../qemuxml2argvdata/virtio-transitional.x86_64-latest.args | 2 +-
>  .../qemuxml2argvdata/x86_64-pc-graphics.x86_64-latest.args  | 2 +-
>  .../qemuxml2argvdata/x86_64-pc-headless.x86_64-latest.args  | 2 +-
>  .../qemuxml2argvdata/x86_64-q35-graphics.x86_64-latest.args | 2 +-
>  .../qemuxml2argvdata/x86_64-q35-headless.x86_64-latest.args | 2 +-
>  30 files changed, 46 insertions(+), 43 deletions(-)

Reviewed-by: Michal Privoznik 

Michal



Re: libvirt-guests configurability regression

2022-11-10 Thread Andrea Bolognani
On Thu, Nov 10, 2022 at 08:57:25AM +, Daniel P. Berrangé wrote:
> On Wed, Nov 09, 2022 at 09:17:08PM +0100, Olaf Hering wrote:
> > Wed, 9 Nov 2022 09:04:12 -0800 Andrea Bolognani :
> > > Olaf, can you please remind me why the files we dropped were
> > > problematic but these ones apparently aren't?
> >
> > These are equally problematic because they are owned by the admin as well.
> >
> > They are essentially empty, their content is a duplication from doc/ (I 
> > hope).
> > They should not be part of libvirt.git.
>
> We are not going to remove these files from git or from the RPM, it would
> be incredibly user hostile. I didn't mind removing the sysconfig files
> because they were legacy configs from initscripts era which are obsolete
> with systemd and so should not really be used except for existing
> deployments.

Should we reconsider the fate of libvirt-guests's sysconfig file
then?

The other sysconfig files basically only influenced the way each
service is spawned (VIRT*_ARGS, mostly used for --timeout) but in the
case of libvirt-guests the sysconfig file is used to configure the
behavior in ways that don't really fall under the purview of systemd.

Maybe a proper /etc/libvirt/libvirt-guests.conf would make sense, for
consistency with other services? But then, you don't want to be
parsing libvirt's config format from a shell script. Could we rewrite
libvirt-guests in C, possibly improving on the hairier parts of it in
the process? Perhaps even take the opportunity to reconsider the way
some of its settings interact with each other.

I might be getting a bit ahead of myself here :)

-- 
Andrea Bolognani / Red Hat / Virtualization



[PATCH v3] nodedev: ignore EINVAL from libudev in udevEventHandleThread

2022-11-10 Thread christian . ehrhardt
From: Christian Ehrhardt 

Certain udev entries might be of a size that makes libudev emit EINVAL
which right now leads to udevEventHandleThread exiting. Due to no more
handling events other elements of libvirt will start pushing for events
to be consumed which never happens causing a busy loop burning a cpu
without any gain.

After evaluation of the example case discussed in in #245 and a test
run ignoring EINVAL it was considered safe to add EINVAL to the ignored
errnos to not exit udevEventHandleThread giving it more resilience.

The root cause is in systemd and by now was discussed and fixed via
https://github.com/systemd/systemd/issues/24987, but hardening libvirt
to be able to better deal with EINVAL returned still is the right thing
to avoid the reported busy loops on systemd with older systemd versions.

Fixes: https://gitlab.com/libvirt/libvirt/-/issues/245

Signed-off-by: Christian Ehrhardt 
Reviewed-by: Daniel P. Berrangé 
---
 src/node_device/node_device_udev.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/node_device/node_device_udev.c 
b/src/node_device/node_device_udev.c
index 24ef1c25a9..2454cab8f8 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -1865,10 +1865,12 @@ udevEventHandleThread(void *opaque G_GNUC_UNUSED)
 }
 
 /* POSIX allows both EAGAIN and EWOULDBLOCK to be used
- * interchangeably when the read would block or timeout was fired
+ * interchangeably when the read would block or timeout was fired.
+ * EINVAL might happen on too large udev entries, ignore those for
+ * the robustness of udevEventHandleThread.
  */
 VIR_WARNINGS_NO_WLOGICALOP_EQUAL_EXPR
-if (errno != EAGAIN && errno != EWOULDBLOCK) {
+if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINVAL) {
 VIR_WARNINGS_RESET
 virReportSystemError(errno, "%s",
  _("failed to receive device from udev "
-- 
2.38.1



Re: libvirt-guests configurability regression

2022-11-10 Thread Daniel P . Berrangé
On Wed, Nov 09, 2022 at 09:17:08PM +0100, Olaf Hering wrote:
> Wed, 9 Nov 2022 09:04:12 -0800 Andrea Bolognani :
> 
> > Olaf, can you please remind me why the files we dropped were
> > problematic but these ones apparently aren't?
> 
> These are equally problematic because they are owned by the admin as well.
> 
> They are essentially empty, their content is a duplication from doc/ (I hope).
> They should not be part of libvirt.git.

We are not going to remove these files from git or from the RPM, it would
be incredibly user hostile. I didn't mind removing the sysconfig files
because they were legacy configs from initscripts era which are obsolete
with systemd and so should not really be used except for existing
deployments.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [PATCH] qemu: capabilities: Detect support for JSON args for -netdev

2022-11-10 Thread Peter Krempa
On Wed, Nov 09, 2022 at 18:18:16 +0100, Peter Krempa wrote:
> JSON args for -netdev were added as precursor for adding the 'dgram'
> network backend type. Enable the detection and update test cases using
> DO_TEST_CAPS_LATEST.
> 
> Enabling the capability also ensures that the -netdev argument is
> validated against the QAPI schema of 'netdev_add' which was already
> implemented but not enabled.
> 
> The parser supporting JSON was added by qemu commit f3eedcddba3 and

https://gitlab.com/qemu-project/qemu/-/commit/f3eedcddba3#28a48d37e50f9c5a429dcaef8c866d9e6b4ad469_1586_1636

You can see that it uses the visitor for 'Netdev' which is the type also
used as argument for 'netdev_add'. Our internals generate a JSON object
for both cases and just convert it to commandline if JSON is not used.

> enabled when adding stream/dgram netdevs in commit 5166fe0ae46.

https://gitlab.com/qemu-project/qemu/-/commit/5166fe0ae46#28a48d37e50f9c5a429dcaef8c866d9e6b4ad469_1628_1642

Here it explicitly detects opening { as modern syntax.