[ https://issues.apache.org/jira/browse/CLOUDSTACK-359?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13484223#comment-13484223 ]
Hugo Trippaers commented on CLOUDSTACK-359: ------------------------------------------- Proposed fix, now this needs some serious testing. Will report back later. diff --git a/server/src/com/cloud/cluster/ClusterManagerImpl.java b/server/src/com/cloud/cluster/ClusterManagerImpl.java index 4dbb16c..f6fdef0 100755 --- a/server/src/com/cloud/cluster/ClusterManagerImpl.java +++ b/server/src/com/cloud/cluster/ClusterManagerImpl.java @@ -407,6 +407,23 @@ public class ClusterManagerImpl implements ClusterManager { Answer[] answers = new Answer[1]; answers[0] = new Answer(cmd, result, null); return _gson.toJson(answers); + } else if (cmds.length == 1 && cmds[0] instanceof PropagateResourceEventCommand ) { + PropagateResourceEventCommand cmd = (PropagateResourceEventCommand) cmds[0]; + + s_logger.debug("Intercepting command to propagate event " + cmd.getEvent().name() + " for host " + cmd.getHostId()); + + boolean result = false; + try { + result = executeResourceUserRequest(cmd.getHostId(), cmd.getEvent()); + s_logger.debug("Result is " + result); + } catch (AgentUnavailableException ex) { + s_logger.warn("Agent is unavailable", ex); + return null; + } + + Answer[] answers = new Answer[1]; + answers[0] = new Answer(cmd, result, null); + return _gson.toJson(answers); } > PropagateResourceEventCommand failes in cluster configuration > ------------------------------------------------------------- > > Key: CLOUDSTACK-359 > URL: https://issues.apache.org/jira/browse/CLOUDSTACK-359 > Project: CloudStack > Issue Type: Bug > Components: Management Server > Affects Versions: 4.0.0 > Reporter: Hugo Trippaers > Priority: Critical > Fix For: 4.0.0 > > > When enabling maintenance mode on a hypervisor the command failes. This seems > to only happen in the case where the command is received by the api on server > A and the agent for the hypervisor is running on server B. > The setup this was encountered on is a two node cluster running an early pre > release of the 4.0 branch. > 2012-10-16 10:01:43,589 DEBUG [cloud.async.AsyncJobManagerImpl] > (TP-Processor22:null) submit async job-18377, details: AsyncJobVO {id:18377, > userId: 2, accoun > tId: 2, sessionKey: null, instanceType: Host, instanceId: 133, cmd: > com.cloud.api.commands.PrepareForMaintenanceCmd, cmdOriginator: null, > cmdInfo: {"response" > :"json","id":"931cc0bc-a423-4600-8ccd-0597eeffaa85","sessionkey":"R4fLb60jJNSdAIe8zt4wRcfCE+E\u003d","ctxUserId":"2","_":"1350374503534","ctxAccountId":"2","c > txStartEventId":"144113"}, cmdVersion: 0, callbackType: 0, callbackAddress: > null, status: 0, processStatus: 0, resultCode: 0, result: null, initMsid: > 34505243 > 3506, completeMsid: null, lastUpdated: null, lastPolled: null, created: null} > 2012-10-16 10:01:43,589 DEBUG [cloud.async.AsyncJobManagerImpl] > (Job-Executor-68:job-18377) Executing > com.cloud.api.commands.PrepareForMaintenanceCmd for job- > 18377 > 2012-10-16 10:01:43,617 DEBUG [cloud.cluster.ClusterManagerImpl] > (Job-Executor-68:job-18377) Propagating agent change request > event:AdminAskMaintenace to agen > t:133 > 2012-10-16 10:01:43,617 DEBUG [cloud.cluster.ClusterManagerImpl] > (Job-Executor-68:job-18377) 345052433506 -> 345052433504.133 > [{"PropagateResourceEventCommand > ":{"hostId":133,"event":"AdminAskMaintenace","contextMap":{},"wait":0}}] > 2012-10-16 10:01:43,618 DEBUG [cloud.cluster.ClusterManagerImpl] > (Cluster-Worker-5:null) Cluster PDU 345052433506 -> 345052433504. agent: 133, > pdu seq: 75, pd > u ack seq: 0, json: > [{"PropagateResourceEventCommand":{"hostId":133,"event":"AdminAskMaintenace","contextMap":{},"wait":0}}] > 2012-10-16 10:01:43,625 DEBUG [cloud.cluster.ClusterServiceServletImpl] > (Cluster-Worker-5:null) POST http://10.200.22.16:9090/clusterservice response > :true, r > esponding time: 6 ms > 2012-10-16 10:01:43,626 DEBUG [cloud.cluster.ClusterManagerImpl] > (Cluster-Worker-5:null) Cluster PDU 345052433506 -> 345052433504 completed. > time: 7ms. agent: > 133, pdu seq: 75, pdu ack seq: 0, json: > [{"PropagateResourceEventCommand":{"hostId":133,"event":"AdminAskMaintenace","contextMap":{},"wait":0}}] > 2012-10-16 10:01:43,635 DEBUG [cloud.cluster.ClusterManagerImpl] > (Job-Executor-68:job-18377) 345052433506 -> 345052433504.133 completed. > result: [{"Unsupporte > dAnswer":{"result":false,"details":"Unsupported command > issued:com.cloud.agent.api.PropagateResourceEventCommand. Are you sure you > got the right type of serv > er?","contextMap":{},"wait":0}}] > 2012-10-16 10:01:43,636 DEBUG [cloud.cluster.ClusterManagerImpl] > (Job-Executor-68:job-18377) Result for agent change is false > 2012-10-16 10:01:43,636 ERROR [cloud.api.ApiDispatcher] > (Job-Executor-68:job-18377) Exception while executing > PrepareForMaintenanceCmd: > com.cloud.utils.exception.CloudRuntimeException: Unable to prepare for > maintenance host 133 > at > com.cloud.resource.ResourceManagerImpl.maintain(ResourceManagerImpl.java:1176) > at > com.cloud.api.commands.PrepareForMaintenanceCmd.execute(PrepareForMaintenanceCmd.java:102) > at com.cloud.api.ApiDispatcher.dispatch(ApiDispatcher.java:138) > at > com.cloud.async.AsyncJobManagerImpl$1.run(AsyncJobManagerImpl.java:449) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441) > at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) > at java.util.concurrent.FutureTask.run(FutureTask.java:138) > at > java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) > at java.lang.Thread.run(Thread.java:662) > 2012-10-16 10:01:43,637 DEBUG [cloud.async.AsyncJobManagerImpl] > (Job-Executor-68:job-18377) Complete async job-18377, jobStatus: 2, > resultCode: 530, result: c > om.cloud.api.response.ExceptionResponse@6e13b651 -- This message is automatically generated by JIRA. If you think it was sent incorrectly, please contact your JIRA administrators For more information on JIRA, see: http://www.atlassian.com/software/jira