On Tue, Aug 28, 2018 at 04:40:16PM +0800, Yi Wang wrote:
When doing some job holding state lock for a long time, we may come across error: "Timed out during operation: cannot acquire state change lock" Well, sometimes it's not a problem and users wanner continue
s/wanner/want to/
to wait, and this patch allow users decide how long time they can wait the state lock. Signed-off-by: Yi Wang <[email protected]> Reviewed-by: Xi Xu <[email protected]> --- changes in v4: - fox syntax-check error changes in v3: - add user-friendly description and nb of state lock - check validity of stateLockTimeout changes in v2: - change default value to 30 in qemu.conf - set the default value in virQEMUDriverConfigNew() v4 Signed-off-by: Yi Wang <[email protected]> --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 10 ++++++++++ src/qemu/qemu_conf.c | 14 ++++++++++++++ src/qemu/qemu_conf.h | 2 ++ src/qemu/qemu_domain.c | 5 +---- src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index ddc4bbf..f7287ae 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -93,6 +93,7 @@ module Libvirtd_qemu = | limits_entry "max_core" | bool_entry "dump_guest_core" | str_entry "stdio_handler" + | int_entry "state_lock_timeout"
here you add the option at the end of the 'process_entry' group
let device_entry = bool_entry "mac_filter"
| bool_entry "relaxed_acs_check"
diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf
index cd57b3c..8920a1a 100644
--- a/src/qemu/qemu.conf
+++ b/src/qemu/qemu.conf
@@ -667,6 +667,16 @@
#
#max_queued = 0
+
+# When two or more threads want to work with the same domain they use a
+# job lock to mutually exclude each other. However, waiting for the lock
+# is limited up to state_lock_timeout seconds.
+# NB, strong recommendation to set the timeout longer than 30 seconds.
+#
+# Default is 30
+#
+#state_lock_timeout = 60
But here in qemu.conf, you add it between the rpc entries. It seems we did not follow the structure with 'stdio_handler', but adding it either right after 'dump_guest_core' or at the end of file would be better than squeezing it between rpc entries.
+ ################################################################### # Keepalive protocol: # This allows qemu driver to detect broken connections to remote diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index a4f545e..c761cae 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -129,6 +129,9 @@ void qemuDomainCmdlineDefFree(qemuDomainCmdlineDefPtr def) #endif +/* Give up waiting for mutex after 30 seconds */ +#define QEMU_JOB_WAIT_TIME (1000ull * 30) +
Here the constant is multiplied by 1000 to convert from seconds to milliseconds.
virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged)
{
virQEMUDriverConfigPtr cfg;
@@ -346,6 +349,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool
privileged)
cfg->glusterDebugLevel = 4;
cfg->stdioLogD = true;
+ cfg->stateLockTimeout = QEMU_JOB_WAIT_TIME;
+
if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST)))
goto error;
@@ -863,6 +868,9 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg,
if (virConfGetValueUInt(conf, "keepalive_count", &cfg->keepAliveCount) < 0)
goto cleanup;
+ if (virConfGetValueInt(conf, "state_lock_timeout", &cfg->stateLockTimeout)
< 0)
+ goto cleanup;
+
But the parsed value is passed directly here, so '60' in the config file would result in 60 ms.
if (virConfGetValueInt(conf, "seccomp_sandbox", &cfg->seccompSandbox) < 0)
goto cleanup;
@@ -1055,6 +1063,12 @@ virQEMUDriverConfigValidate(virQEMUDriverConfigPtr cfg)
return -1;
}
+ if (cfg->stateLockTimeout <= 0) {
+ virReportError(VIR_ERR_CONF_SYNTAX, "%s",
+ _("state_lock_timeout should larger than zero"));
+ return -1;
+ }
+
return 0;
}
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index a8d84ef..97cf2e1 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -190,6 +190,8 @@ struct _virQEMUDriverConfig {
int keepAliveInterval;
unsigned int keepAliveCount;
+ int stateLockTimeout;
+
int seccompSandbox;
char *migrateHost;
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 886e3fb..5a2ca52 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -6652,9 +6652,6 @@ qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv,
priv->job.agentActive == QEMU_AGENT_JOB_NONE));
}
-/* Give up waiting for mutex after 30 seconds */
-#define QEMU_JOB_WAIT_TIME (1000ull * 30)
-
/**
* qemuDomainObjBeginJobInternal:
* @driver: qemu driver
@@ -6714,7 +6711,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
}
priv->jobs_queued++;
- then = now + QEMU_JOB_WAIT_TIME;
+ then = now + cfg->stateLockTimeout;
retry:
if ((!async && job != QEMU_JOB_DESTROY) &&
diff --git a/src/qemu/test_libvirtd_qemu.aug.in
b/src/qemu/test_libvirtd_qemu.aug.in
index f1e8806..dc5de96 100644
--- a/src/qemu/test_libvirtd_qemu.aug.in
+++ b/src/qemu/test_libvirtd_qemu.aug.in
@@ -105,3 +105,4 @@ module Test_libvirtd_qemu =
{ "pr_helper" = "/usr/bin/qemu-pr-helper" }
{ "swtpm_user" = "tss" }
{ "swtpm_group" = "tss" }
+{ "state_lock_timeout" = "60" }
This needs to be ordered properly - install the 'augeas' tool to see where 'make check' fails. Jano
signature.asc
Description: Digital signature
-- libvir-list mailing list [email protected] https://www.redhat.com/mailman/listinfo/libvir-list
