DaanHoogland commented on a change in pull request #3575: [WIP DO NOT MERGE]
Health check feature for virtual router
URL: https://github.com/apache/cloudstack/pull/3575#discussion_r367913932
##########
File path:
server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java
##########
@@ -1252,39 +1254,60 @@ protected void runInContext() {
for (final DomainRouterVO router : routers) {
GetRouterMonitorResultsAnswer answer =
fetchAndUpdateRouterHealthChecks(router, false);
- String checkFailsToRestartVr =
RouterHealthChecksFailuresToRestartVr.valueIn(router.getDataCenterId());
- if (answer == null) {
- s_logger.warn("Unable to fetch monitor results for
router " + router);
- updateRouterConnectivityHealthCheck(router.getId(),
false, "Communication failed");
- } else if (!answer.getResult()) {
- s_logger.warn("Failed to fetch monitor results from
router " + router + " with details: " + answer.getDetails());
- updateRouterConnectivityHealthCheck(router.getId(),
false, "Failed to fetch results with details: " + answer.getDetails());
- } else {
- updateRouterConnectivityHealthCheck(router.getId(),
true, "Successfully fetched data");
- updateDbHealthChecksFromRouterResponse(router.getId(),
answer.getMonitoringResults());
-
- // Check failing tests and restart if needed
- if (answer.getFailingChecks().size() > 0 &&
StringUtils.isNotBlank(checkFailsToRestartVr)) {
- s_logger.warn("Found failing checks on router " +
router + ". " +
- "Checking failed health checks to see if
router needs reboot");
- for (String failedCheck :
answer.getFailingChecks()) {
-
ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM,
- Domain.ROOT_DOMAIN,
EventTypes.EVENT_ROUTER_HEALTH_CHECKS,
- "Router " + router.getUuid() + " has
failing check " + failedCheck);
- if
(checkFailsToRestartVr.contains(failedCheck)) {
- s_logger.warn("Health Check Alert: Found
failing check " + failedCheck + " in " +
-
RouterHealthChecksFailuresToRestartVrCK + ", attempting restart of router.");
- recreateRouter(router.getId());
- }
- }
- }
- }
+ List<String> failingChecks = getFailingChecks(router,
answer);
Review comment:
👍
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services