This is an automated email from the ASF dual-hosted git repository. jxue pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/helix.git
commit 4ecd923f279a53554fc66e9a71356af2f125c451 Author: narendly <[email protected]> AuthorDate: Tue Feb 26 17:30:47 2019 -0800 [HELIX-812] HELIX: Fix maintenance history bug There was a bug in maintenance history where when the cluster exits maintenance mode automatically, it would record the exit action twice in history. This is because each pipeline is designed to run MaintenanceRecoveryStage twice. Changelist: 1. Add a flag so that if maintenanceSignal has been changed, just return from MaintenanceRecoveryStage --- .../controller/dataproviders/BaseControllerDataProvider.java | 12 ++++++++++++ .../helix/controller/stages/MaintenanceRecoveryStage.java | 8 ++++++++ .../integration/controller/TestClusterMaintenanceMode.java | 1 - 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java index 8d2a4a5..da91f97 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java @@ -78,6 +78,7 @@ public class BaseControllerDataProvider implements ControlContextProvider { private boolean _updateInstanceOfflineTime = true; private MaintenanceSignal _maintenanceSignal; private boolean _isMaintenanceModeEnabled; + private boolean _hasMaintenanceSignalChanged; private ExecutorService _asyncTasksThreadPool; // A map recording what data has changed @@ -271,6 +272,9 @@ public class BaseControllerDataProvider implements ControlContextProvider { private void updateMaintenanceInfo(final HelixDataAccessor accessor) { _maintenanceSignal = accessor.getProperty(accessor.keyBuilder().maintenance()); _isMaintenanceModeEnabled = _maintenanceSignal != null; + // The following flag is to guarantee that there's only one update per pineline run because we + // check for whether maintenance recovery could happen twice every pipeline + _hasMaintenanceSignalChanged = false; } private void updateIdealRuleMap() { @@ -690,6 +694,14 @@ public class BaseControllerDataProvider implements ControlContextProvider { return _isMaintenanceModeEnabled; } + public boolean hasMaintenanceSignalChanged() { + return _hasMaintenanceSignalChanged; + } + + public void setMaintenanceSignalChanged() { + _hasMaintenanceSignalChanged = true; + } + public MaintenanceSignal getMaintenanceSignal() { return _maintenanceSignal; } diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java index e366ffa..d64459a 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java @@ -52,6 +52,13 @@ public class MaintenanceRecoveryStage extends AbstractAsyncBaseStage { return; } + // Check if the maintenance signal has been changed during this pipeline run + // If true, skip this stage because the Controller already changed the signal + // The flag will be flipped in the next ReadClusterDataStage() + if (cache.hasMaintenanceSignalChanged()) { + return; + } + // Check for the maintenance signal // If it was entered manually or the signal is null (which shouldn't happen), skip this stage MaintenanceSignal maintenanceSignal = cache.getMaintenanceSignal(); @@ -108,6 +115,7 @@ public class MaintenanceRecoveryStage extends AbstractAsyncBaseStage { // MaintenanceSignal. AutoTriggerReason won't be recorded manager.getClusterManagmentTool().autoEnableMaintenanceMode(manager.getClusterName(), false, reason, internalReason); + cache.setMaintenanceSignalChanged(); // Set the flag so we do not double enable/disable LogUtil.logInfo(LOG, _eventId, reason); } } diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java index 806af2f..2e4f440 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java +++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java @@ -343,7 +343,6 @@ public class TestClusterMaintenanceMode extends TaskTestBase { ClusterConfig clusterConfig = _manager.getConfigAccessor().getClusterConfig(CLUSTER_NAME); clusterConfig.setMaxPartitionsPerInstance(-1); _manager.getConfigAccessor().setClusterConfig(CLUSTER_NAME, clusterConfig); - System.out.println("Set clusterconfig"); Thread.sleep(500L);
