This is an automated email from the ASF dual-hosted git repository.
bhaisaab pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/cloudstack.git
The following commit(s) were added to refs/heads/master by this push:
new 3ee8d83 CLOUDSTACK-10136: Fix RemoteHostEndPoint thread growth
3ee8d83 is described below
commit 3ee8d83621c23f976413fdce6d9245197497d504
Author: Rohit Yadav <[email protected]>
AuthorDate: Wed Nov 8 16:50:46 2017 +0530
CLOUDSTACK-10136: Fix RemoteHostEndPoint thread growth
This fixes the following:
- Unchecked thread growth in RemoteEndHostEndPoint
- Potential NPE while finding EP for a storage/scope
Unbounded thread growth can be reproduced with following findings:
- Every unreachable template would produce 6 new threads (in a single
ScheduledExecutorService instance) spaced by 10 seconds
- Every reachable template url without the template would produce 1 new
thread (and one ScheduledExecutorService instance), it errors out quickly
without
causing more thread growth.
- Every valid url will produce upto 10 threads as the same ep (endpoint
instance) will be reused to query upload/download (async callback)
progresses.
Every RemoteHostEndPoint instances creates its own
ScheduledExecutorService instance which is why in the jstack dump, we
see several threads that share the prefix RemoteHostEndPoint-{1..10}
(given poolsize is defined as 10, it uses suffixes 1-10).
This fixes the discovered thread leakage with following notes:
- Instead of ScheduledExecutorService instance, a cached pool could be
used instead and was implemented, and with `static` scope to be reused
among other future RemoteHostEndPoint instances.
- It was not clear why we would want to wait when we've Answers returned
from the remote EP, and therefore a scheduled/delayed Runnable was
not required at all for processing answers. ScheduledExecutorService
was therefore not really required, moved to ExecutorService instead.
- Another benefit of using a cached pool is that it will shutdown
threads if they are not used in 60 seconds, and they get re-used for
future runnable submissions.
- Caveat: the executor service is still unbounded, however, the use-case
that this method is used for short jobs to check upload/download
progresses fits the case here.
- Refactored CmdRunner to not use/reference objects from parent class.
Signed-off-by: Rohit Yadav <[email protected]>
---
.../cloudstack/storage/RemoteHostEndPoint.java | 24 +++++++++++-----------
.../storage/endpoint/DefaultEndPointSelector.java | 21 +++++++++++--------
2 files changed, 24 insertions(+), 21 deletions(-)
diff --git
a/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java
b/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java
index 3bad62e..b438bc1 100644
--- a/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java
+++ b/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java
@@ -18,17 +18,15 @@
*/
package org.apache.cloudstack.storage;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
-import org.apache.log4j.Logger;
-
import org.apache.cloudstack.engine.subsystem.api.storage.EndPoint;
import org.apache.cloudstack.framework.async.AsyncCompletionCallback;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
+import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
@@ -54,9 +52,11 @@ import com.cloud.vm.dao.SecondaryStorageVmDao;
public class RemoteHostEndPoint implements EndPoint {
private static final Logger s_logger =
Logger.getLogger(RemoteHostEndPoint.class);
+
private long hostId;
private String hostAddress;
private String publicAddress;
+
@Inject
AgentManager agentMgr;
@Inject
@@ -65,10 +65,10 @@ public class RemoteHostEndPoint implements EndPoint {
protected SecondaryStorageVmDao vmDao;
@Inject
protected HostDao _hostDao;
- private ScheduledExecutorService executor;
+
+ private static ExecutorService executorService =
Executors.newCachedThreadPool(new NamedThreadFactory("RemoteHostEndPoint"));
public RemoteHostEndPoint() {
- executor = Executors.newScheduledThreadPool(10, new
NamedThreadFactory("RemoteHostEndPoint"));
}
private void configure(Host host) {
@@ -134,17 +134,17 @@ public class RemoteHostEndPoint implements EndPoint {
}
private class CmdRunner extends ManagedContextRunnable implements Listener
{
- final AsyncCompletionCallback<Answer> callback;
- Answer answer;
+ private final AsyncCompletionCallback<Answer> callback;
+ private Answer answer;
- public CmdRunner(AsyncCompletionCallback<Answer> callback) {
+ CmdRunner(final AsyncCompletionCallback<Answer> callback) {
this.callback = callback;
}
@Override
public boolean processAnswers(long agentId, long seq, Answer[]
answers) {
- answer = answers[0];
- executor.schedule(this, 10, TimeUnit.SECONDS);
+ this.answer = answers[0];
+ RemoteHostEndPoint.executorService.submit(this);
return true;
}
@@ -204,7 +204,7 @@ public class RemoteHostEndPoint implements EndPoint {
@Override
protected void runInContext() {
- callback.complete(answer);
+ this.callback.complete(this.answer);
}
}
diff --git
a/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java
b/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java
index e414b6c..c25c1ad 100644
---
a/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java
+++
b/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java
@@ -106,16 +106,19 @@ public class DefaultEndPointSelector implements
EndPointSelector {
StringBuilder sbuilder = new StringBuilder();
sbuilder.append(sqlBase);
- if (scope.getScopeType() == ScopeType.HOST) {
- sbuilder.append(" and h.id = ");
- sbuilder.append(scope.getScopeId());
- } else if (scope.getScopeType() == ScopeType.CLUSTER) {
- sbuilder.append(" and h.cluster_id = ");
- sbuilder.append(scope.getScopeId());
- } else if (scope.getScopeType() == ScopeType.ZONE) {
- sbuilder.append(" and h.data_center_id = ");
- sbuilder.append(scope.getScopeId());
+ if (scope != null) {
+ if (scope.getScopeType() == ScopeType.HOST) {
+ sbuilder.append(" and h.id = ");
+ sbuilder.append(scope.getScopeId());
+ } else if (scope.getScopeType() == ScopeType.CLUSTER) {
+ sbuilder.append(" and h.cluster_id = ");
+ sbuilder.append(scope.getScopeId());
+ } else if (scope.getScopeType() == ScopeType.ZONE) {
+ sbuilder.append(" and h.data_center_id = ");
+ sbuilder.append(scope.getScopeId());
+ }
}
+
// TODO: order by rand() is slow if there are lot of hosts
sbuilder.append(") t where t.value<>'true' or t.value is null");
//Added for exclude cluster's subquery
sbuilder.append(" ORDER by rand() limit 1");
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].