DaanHoogland commented on a change in pull request #3575: [WIP DO NOT MERGE]
Health check feature for virtual router
URL: https://github.com/apache/cloudstack/pull/3575#discussion_r358202932
##########
File path:
server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java
##########
@@ -1186,6 +1219,290 @@ protected void pushToUpdateQueue(final List<NetworkVO>
networks) throws Interrup
}
}
+ protected class AnalyseRouterMonitorResultsTask extends
ManagedContextRunnable {
+ public AnalyseRouterMonitorResultsTask() {
+ }
+
+ @Override
+ protected void runInContext() {
+ try {
+ final List<DomainRouterVO> routers =
_routerDao.listByStateAndManagementServer(VirtualMachine.State.Running,
mgmtSrvrId);
+ s_logger.debug("Found " + routers.size() + " running routers.
");
+
+ for (final DomainRouterVO router : routers) {
+ GetRouterMonitorResultsAnswer answer =
getMonitorResults(router, false);
+ String checkFailsToRestartVr =
RouterHealthChecksFailuresToRestartVr.valueIn(router.getDataCenterId());
+ if (answer != null && answer.getFailingChecks().size() > 0
&& StringUtils.isNotBlank(checkFailsToRestartVr)) {
+ for (String failedCheck : answer.getFailingChecks()) {
+ if (checkFailsToRestartVr.contains(failedCheck)) {
+ rebootRouter(router.getId(), true);
+ }
+ }
+ }
+ }
+ } catch (final Exception ex) {
+ s_logger.error("Fail to complete the
AnalyseRouterMonitorResultsTask! ", ex);
+ }
+ }
+ }
+
+ // Returns null if health checks are not enabled
+ private GetRouterMonitorResultsAnswer getMonitorResults(DomainRouterVO
router, boolean performFreshChecks) {
+ if (!RouterHealthChecksEnabled.valueIn(router.getDataCenterId())) {
+ return null;
+ }
+
+ String controlIP = getRouterControlIP(router);
+ if (StringUtils.isNotBlank(controlIP) && !controlIP.equals("0.0.0.0"))
{
+ final GetRouterMonitorResultsCommand command = new
GetRouterMonitorResultsCommand(performFreshChecks);
+ command.setAccessDetail(NetworkElementCommand.ROUTER_IP,
controlIP);
+ command.setAccessDetail(NetworkElementCommand.ROUTER_NAME,
router.getInstanceName());
+ try {
+ final Answer answer = _agentMgr.easySend(router.getHostId(),
command);
+
+ if (answer == null) {
+ s_logger.warn("Unable to fetch monitoring results data
from router " + router.getHostName());
+ return null;
+ }
+ if (answer instanceof GetRouterMonitorResultsAnswer) {
+ return (GetRouterMonitorResultsAnswer) answer;
+ } else {
+ s_logger.warn("Unable to fetch health checks results to
router " + router.getHostName() + " Received answer " + answer.getDetails());
+ return new GetRouterMonitorResultsAnswer(command, false,
null, answer.getDetails());
+ }
+ } catch (final Exception e) {
+ s_logger.warn("Error while collecting alerts from router: " +
router.getInstanceName(), e);
+ return null;
+ }
+ }
+
+ return null;
+ }
+
+ @Override
+ public Map<String, String> getRouterHealthCheckResults(long routerId,
boolean runChecks) {
+ DomainRouterVO router = _routerDao.findById(routerId);
+ Map<String, String> result = new HashMap<>();
+
+ if (router == null) {
+ result.put("success", "False");
+ result.put("message", "Router not found");
+ return result;
+ }
+
+ if (!RouterHealthChecksEnabled.valueIn(router.getDataCenterId())) {
+ result.put("success", "False");
+ result.put("message", "Router id not valid. Health checks are
disabled in router's zone.");
+ return result;
+ }
+
+ s_logger.info("Getting router health check results for router " +
router.getUuid());
+
+ if (runChecks) {
+ boolean successfullyUpdatedData =
updateRouterHealthCheckData(router);
+ s_logger.info("Updating health check data for fresh run
successfully: " + successfullyUpdatedData);
+ }
+
+ s_logger.info("Retrieving results for fresh health check execution for
router " + router.getUuid());
+ GetRouterMonitorResultsAnswer answer = getMonitorResults(router,
runChecks);
+ if (answer == null) {
+ result.put("success", "False");
+ result.put("message", "Router is unreachable.");
+ return result;
+ }
+
+ result.put("success", String.valueOf(answer.getResult()));
+ result.put("message", answer.getDetails());
+
+ return result;
+ }
+
+ protected class UpdateRouterHealthChecksConfigDataTask extends
ManagedContextRunnable {
+ public UpdateRouterHealthChecksConfigDataTask() {
+ }
+
+ @Override
+ protected void runInContext() {
+ try {
+ final List<DomainRouterVO> routers =
_routerDao.listByStateAndManagementServer(VirtualMachine.State.Running,
mgmtSrvrId);
+ s_logger.debug("Found " + routers.size() + " running routers.
");
+
+ for (final DomainRouterVO router : routers) {
+ updateRouterHealthCheckData(router);
+ }
+ } catch (final Exception ex) {
+ s_logger.error("Fail to complete the
UpdateRouterHealthChecksConfigDataTask! ", ex);
+ }
+ }
+ }
+
+ private boolean updateRouterHealthCheckData(DomainRouterVO router) {
+ if (!RouterHealthChecksEnabled.valueIn(router.getDataCenterId())) {
+ return false;
+ }
+
+ String controlIP = getRouterControlIP(router);
+ if (StringUtils.isNotBlank(controlIP) && !controlIP.equals("0.0.0.0"))
{
+ s_logger.info("Updating data for router health checks for router "
+ router.getUuid());
+ final SetMonitorServiceCommand command = new
SetMonitorServiceCommand();
+ command.setAccessDetail(NetworkElementCommand.ROUTER_IP,
getRouterControlIP(router));
+ command.setAccessDetail(NetworkElementCommand.ROUTER_NAME,
router.getInstanceName());
+
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED,
RouterHealthChecksEnabled.valueIn(router.getDataCenterId()).toString());
+
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL,
RouterHealthChecksBasicInterval.value().toString());
+
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL,
RouterHealthChecksAdvancedInterval.value().toString());
+
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED,
RouterHealthChecksToExclude.valueIn(router.getDataCenterId()));
+
command.setAdditionalData(getAdditionalDataForRouterHealthChecks(router));
+ command.setReconfigureAfterUpdate(true);
+
+ Answer origAnswer = null;
+ try {
+ origAnswer = _agentMgr.easySend(router.getHostId(), command);
+ } catch (final Exception e) {
+ s_logger.warn("Error while collecting alerts from router: " +
router.getInstanceName(), e);
+ return false;
+ }
+
+ if (origAnswer == null) {
+ s_logger.warn("Unable to update health checks data to router "
+ router.getHostName());
+ return false;
+ }
+
+ GroupAnswer answer = null;
+ if (origAnswer instanceof GroupAnswer) {
+ answer = (GroupAnswer) origAnswer;
+ } else {
+ s_logger.warn("Unable to update health checks data to router "
+ router.getHostName() + " Received answer " + origAnswer.getDetails());
+ return false;
+ }
+
+ if (!answer.getResult()) {
+ s_logger.warn("Unable to update health checks data to router "
+ router.getHostName() + ", details : " + answer.getDetails());
+ }
+
+ return answer.getResult();
+ }
+ s_logger.debug("Skipping update data on router " + router.getUuid() +
" because controlIp is not correct.");
+ return false;
+ }
+
+ private Map<String, String> getAdditionalDataForRouterHealthChecks(final
DomainRouterVO router) {
Review comment:
still 40 line ;)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services