This is an automated email from the ASF dual-hosted git repository.

jxue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 4ecd923f279a53554fc66e9a71356af2f125c451
Author: narendly <[email protected]>
AuthorDate: Tue Feb 26 17:30:47 2019 -0800

    [HELIX-812] HELIX: Fix maintenance history bug
    
    There was a bug in maintenance history where when the cluster exits 
maintenance mode automatically, it would record the exit action twice in 
history. This is because each pipeline is designed to run 
MaintenanceRecoveryStage twice.
        Changelist:
        1. Add a flag so that if maintenanceSignal has been changed, just 
return from MaintenanceRecoveryStage
---
 .../controller/dataproviders/BaseControllerDataProvider.java | 12 ++++++++++++
 .../helix/controller/stages/MaintenanceRecoveryStage.java    |  8 ++++++++
 .../integration/controller/TestClusterMaintenanceMode.java   |  1 -
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java
 
b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java
index 8d2a4a5..da91f97 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java
@@ -78,6 +78,7 @@ public class BaseControllerDataProvider implements 
ControlContextProvider {
   private boolean _updateInstanceOfflineTime = true;
   private MaintenanceSignal _maintenanceSignal;
   private boolean _isMaintenanceModeEnabled;
+  private boolean _hasMaintenanceSignalChanged;
   private ExecutorService _asyncTasksThreadPool;
 
   // A map recording what data has changed
@@ -271,6 +272,9 @@ public class BaseControllerDataProvider implements 
ControlContextProvider {
   private void updateMaintenanceInfo(final HelixDataAccessor accessor) {
     _maintenanceSignal = 
accessor.getProperty(accessor.keyBuilder().maintenance());
     _isMaintenanceModeEnabled = _maintenanceSignal != null;
+    // The following flag is to guarantee that there's only one update per 
pineline run because we
+    // check for whether maintenance recovery could happen twice every pipeline
+    _hasMaintenanceSignalChanged = false;
   }
 
   private void updateIdealRuleMap() {
@@ -690,6 +694,14 @@ public class BaseControllerDataProvider implements 
ControlContextProvider {
     return _isMaintenanceModeEnabled;
   }
 
+  public boolean hasMaintenanceSignalChanged() {
+    return _hasMaintenanceSignalChanged;
+  }
+
+  public void setMaintenanceSignalChanged() {
+    _hasMaintenanceSignalChanged = true;
+  }
+
   public MaintenanceSignal getMaintenanceSignal() {
     return _maintenanceSignal;
   }
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
 
b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
index e366ffa..d64459a 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
@@ -52,6 +52,13 @@ public class MaintenanceRecoveryStage extends 
AbstractAsyncBaseStage {
       return;
     }
 
+    // Check if the maintenance signal has been changed during this pipeline 
run
+    // If true, skip this stage because the Controller already changed the 
signal
+    // The flag will be flipped in the next ReadClusterDataStage()
+    if (cache.hasMaintenanceSignalChanged()) {
+      return;
+    }
+
     // Check for the maintenance signal
     // If it was entered manually or the signal is null (which shouldn't 
happen), skip this stage
     MaintenanceSignal maintenanceSignal = cache.getMaintenanceSignal();
@@ -108,6 +115,7 @@ public class MaintenanceRecoveryStage extends 
AbstractAsyncBaseStage {
       // MaintenanceSignal. AutoTriggerReason won't be recorded
       
manager.getClusterManagmentTool().autoEnableMaintenanceMode(manager.getClusterName(),
 false,
           reason, internalReason);
+      cache.setMaintenanceSignalChanged(); // Set the flag so we do not double 
enable/disable
       LogUtil.logInfo(LOG, _eventId, reason);
     }
   }
diff --git 
a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
 
b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
index 806af2f..2e4f440 100644
--- 
a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
+++ 
b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
@@ -343,7 +343,6 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     ClusterConfig clusterConfig = 
_manager.getConfigAccessor().getClusterConfig(CLUSTER_NAME);
     clusterConfig.setMaxPartitionsPerInstance(-1);
     _manager.getConfigAccessor().setClusterConfig(CLUSTER_NAME, clusterConfig);
-    System.out.println("Set clusterconfig");
 
     Thread.sleep(500L);
 

Reply via email to