This is an automated email from the ASF dual-hosted git repository.

jxue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 4f863c3549631b5e9fccc6f40b513fff6fe435fa
Author: narendly <[email protected]>
AuthorDate: Mon Feb 25 18:09:08 2019 -0800

    [HELIX-801] HELIX: Implement maintenance history for maintenance mode
    
    This diff implements maintenance history for entering and exiting 
maintenance mode.
    Changelist:
    1. Implement a separate DataUpdater for LeaderHistory ZNode update
    2. Implement recording of maintenance history in LeaderHistory ZNode
    3. Fix the bug where only the last few history entries are kept
---
 .../main/java/org/apache/helix/PropertyKey.java    |   6 +-
 .../java/org/apache/helix/PropertyPathBuilder.java |   4 +-
 .../helix/controller/GenericHelixController.java   |   3 +
 .../stages/MaintenanceRecoveryStage.java           |   1 -
 .../manager/zk/DistributedLeaderElection.java      |  29 ++-
 .../org/apache/helix/manager/zk/ZKHelixAdmin.java  |  32 +++-
 .../org/apache/helix/model/ControllerHistory.java  | 212 +++++++++++++++++++++
 .../java/org/apache/helix/model/LeaderHistory.java | 118 ------------
 .../org/apache/helix/model/MaintenanceSignal.java  |   2 +-
 .../controller/TestClusterMaintenanceMode.java     |  98 ++++++++--
 .../controller/TestControllerHistory.java          |  14 +-
 .../server/resources/helix/ClusterAccessor.java    |   4 +-
 12 files changed, 357 insertions(+), 166 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/PropertyKey.java 
b/helix-core/src/main/java/org/apache/helix/PropertyKey.java
index 1ba3108..e16198d 100644
--- a/helix-core/src/main/java/org/apache/helix/PropertyKey.java
+++ b/helix-core/src/main/java/org/apache/helix/PropertyKey.java
@@ -30,7 +30,7 @@ import org.apache.helix.model.HealthStat;
 import org.apache.helix.model.HelixConfigScope.ConfigScopeProperty;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.InstanceConfig;
-import org.apache.helix.model.LeaderHistory;
+import org.apache.helix.model.ControllerHistory;
 import org.apache.helix.model.LiveInstance;
 import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
@@ -656,11 +656,11 @@ public class PropertyKey {
     }
 
     /**
-     * Get a property key associated with {@link LeaderHistory}
+     * Get a property key associated with {@link ControllerHistory}
      * @return {@link PropertyKey}
      */
     public PropertyKey controllerLeaderHistory() {
-      return new PropertyKey(HISTORY, LeaderHistory.class, _clusterName);
+      return new PropertyKey(HISTORY, ControllerHistory.class, _clusterName);
     }
 
     /**
diff --git a/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java 
b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java
index e85431c..2bde247 100644
--- a/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java
+++ b/helix-core/src/main/java/org/apache/helix/PropertyPathBuilder.java
@@ -29,7 +29,7 @@ import org.apache.helix.model.CurrentState;
 import org.apache.helix.model.ExternalView;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.InstanceConfig;
-import org.apache.helix.model.LeaderHistory;
+import org.apache.helix.model.ControllerHistory;
 import org.apache.helix.model.LiveInstance;
 import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.Message;
@@ -63,7 +63,7 @@ public class PropertyPathBuilder {
     typeToClassMapping.put(MESSAGES, Message.class);
     typeToClassMapping.put(CURRENTSTATES, CurrentState.class);
     typeToClassMapping.put(STATUSUPDATES, StatusUpdate.class);
-    typeToClassMapping.put(HISTORY, LeaderHistory.class);
+    typeToClassMapping.put(HISTORY, ControllerHistory.class);
     typeToClassMapping.put(PAUSE, PauseSignal.class);
     typeToClassMapping.put(MAINTENANCE, MaintenanceSignal.class);
     // TODO: Below must handle the case for future versions of Task Framework 
with a different path
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
 
b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
index 1ba26f1..dda4552 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/GenericHelixController.java
@@ -304,6 +304,9 @@ public class GenericHelixController implements 
IdealStateChangeListener,
       Pipeline rebalancePipeline = new Pipeline(pipelineName);
       rebalancePipeline.addStage(new BestPossibleStateCalcStage());
       rebalancePipeline.addStage(new IntermediateStateCalcStage());
+      // Need to add MaintenanceRecoveryStage here because 
MAX_PARTITIONS_PER_INSTANCE check could
+      // only occur after IntermediateStateCalcStage calculation
+      rebalancePipeline.addStage(new MaintenanceRecoveryStage());
       rebalancePipeline.addStage(new ResourceMessageGenerationPhase());
       rebalancePipeline.addStage(new MessageSelectionStage());
       rebalancePipeline.addStage(new MessageThrottleStage());
diff --git 
a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
 
b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
index 11d3c92..e366ffa 100644
--- 
a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
+++ 
b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java
@@ -126,7 +126,6 @@ public class MaintenanceRecoveryStage extends 
AbstractAsyncBaseStage {
       // Config is not set; return
       return false;
     }
-
     Map<String, PartitionStateMap> resourceStatesMap =
         intermediateStateOutput.getResourceStatesMap();
     Map<String, Integer> instancePartitionCounts = new HashMap<>();
diff --git 
a/helix-core/src/main/java/org/apache/helix/manager/zk/DistributedLeaderElection.java
 
b/helix-core/src/main/java/org/apache/helix/manager/zk/DistributedLeaderElection.java
index a8fb704..9d9d522 100644
--- 
a/helix-core/src/main/java/org/apache/helix/manager/zk/DistributedLeaderElection.java
+++ 
b/helix-core/src/main/java/org/apache/helix/manager/zk/DistributedLeaderElection.java
@@ -22,6 +22,8 @@ package org.apache.helix.manager.zk;
 import java.lang.management.ManagementFactory;
 import java.util.List;
 
+import org.I0Itec.zkclient.DataUpdater;
+import org.apache.helix.AccessOption;
 import org.apache.helix.HelixDataAccessor;
 import org.apache.helix.HelixException;
 import org.apache.helix.HelixManager;
@@ -30,9 +32,10 @@ import org.apache.helix.InstanceType;
 import org.apache.helix.NotificationContext;
 import org.apache.helix.PropertyKey.Builder;
 import org.apache.helix.PropertyType;
+import org.apache.helix.ZNRecord;
 import org.apache.helix.api.listeners.ControllerChangeListener;
 import org.apache.helix.controller.GenericHelixController;
-import org.apache.helix.model.LeaderHistory;
+import org.apache.helix.model.ControllerHistory;
 import org.apache.helix.model.LiveInstance;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -156,14 +159,22 @@ public class DistributedLeaderElection implements 
ControllerChangeListener {
   private void updateHistory(HelixManager manager) {
     HelixDataAccessor accessor = manager.getHelixDataAccessor();
     Builder keyBuilder = accessor.keyBuilder();
-
-    LeaderHistory history = 
accessor.getProperty(keyBuilder.controllerLeaderHistory());
-    if (history == null) {
-      history = new LeaderHistory(PropertyType.HISTORY.toString());
-    }
-    history
-        .updateHistory(manager.getClusterName(), manager.getInstanceName(), 
manager.getVersion());
-    if (!accessor.setProperty(keyBuilder.controllerLeaderHistory(), history)) {
+    final String clusterName = manager.getClusterName();
+    final String instanceName = manager.getInstanceName();
+    final String version = manager.getVersion();
+
+    // Record a MaintenanceSignal history
+    if 
(!accessor.getBaseDataAccessor().update(keyBuilder.controllerLeaderHistory().getPath(),
+        new DataUpdater<ZNRecord>() {
+          @Override
+          public ZNRecord update(ZNRecord oldRecord) {
+            if (oldRecord == null) {
+              oldRecord = new ZNRecord(PropertyType.HISTORY.toString());
+            }
+            return new ControllerHistory(oldRecord).updateHistory(clusterName, 
instanceName,
+                version);
+          }
+        }, AccessOption.PERSISTENT)) {
       LOG.error("Failed to persist leader history to ZK!");
     }
   }
diff --git 
a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java 
b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index d388afe..5a1310a 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -60,6 +60,7 @@ import org.apache.helix.model.ClusterConfig;
 import org.apache.helix.model.ClusterConstraints;
 import org.apache.helix.model.ClusterConstraints.ConstraintType;
 import org.apache.helix.model.ConstraintItem;
+import org.apache.helix.model.ControllerHistory;
 import org.apache.helix.model.CurrentState;
 import org.apache.helix.model.ExternalView;
 import org.apache.helix.model.HelixConfigScope;
@@ -417,9 +418,9 @@ public class ZKHelixAdmin implements HelixAdmin {
    * @param customFields
    * @param triggeringEntity
    */
-  private void processMaintenanceMode(String clusterName, boolean enabled, 
String reason,
-      MaintenanceSignal.AutoTriggerReason internalReason, Map<String, String> 
customFields,
-      MaintenanceSignal.TriggeringEntity triggeringEntity) {
+  private void processMaintenanceMode(String clusterName, final boolean 
enabled, final String reason,
+      final MaintenanceSignal.AutoTriggerReason internalReason, final 
Map<String, String> customFields,
+      final MaintenanceSignal.TriggeringEntity triggeringEntity) {
     HelixDataAccessor accessor =
         new ZKHelixDataAccessor(clusterName, new 
ZkBaseDataAccessor<ZNRecord>(_zkClient));
     Builder keyBuilder = accessor.keyBuilder();
@@ -427,6 +428,7 @@ public class ZKHelixAdmin implements HelixAdmin {
         triggeringEntity == MaintenanceSignal.TriggeringEntity.CONTROLLER ? 
"automatically"
             : "manually",
         enabled ? "enters" : "exits", reason == null ? "NULL" : reason);
+    final long currentTime = System.currentTimeMillis();
     if (!enabled) {
       // Exit maintenance mode
       accessor.removeProperty(keyBuilder.maintenance());
@@ -436,7 +438,7 @@ public class ZKHelixAdmin implements HelixAdmin {
       if (reason != null) {
         maintenanceSignal.setReason(reason);
       }
-      maintenanceSignal.setTimestamp(System.currentTimeMillis());
+      maintenanceSignal.setTimestamp(currentTime);
       maintenanceSignal.setTriggeringEntity(triggeringEntity);
       switch (triggeringEntity) {
       case CONTROLLER:
@@ -458,9 +460,29 @@ public class ZKHelixAdmin implements HelixAdmin {
         break;
       }
       if (!accessor.createMaintenance(maintenanceSignal)) {
-        throw new HelixException("Failed to create maintenance signal");
+        throw new HelixException("Failed to create maintenance signal!");
       }
     }
+
+    // Record a MaintenanceSignal history
+    if 
(!accessor.getBaseDataAccessor().update(keyBuilder.controllerLeaderHistory().getPath(),
+        new DataUpdater<ZNRecord>() {
+          @Override
+          public ZNRecord update(ZNRecord oldRecord) {
+            try {
+              if (oldRecord == null) {
+                oldRecord = new ZNRecord(PropertyType.HISTORY.toString());
+              }
+              return new 
ControllerHistory(oldRecord).updateMaintenanceHistory(enabled, reason,
+                  currentTime, internalReason, customFields, triggeringEntity);
+            } catch (IOException e) {
+              logger.error("Failed to update maintenance history! Exception: 
{}", e);
+              return oldRecord;
+            }
+          }
+        }, AccessOption.PERSISTENT)) {
+      logger.error("Failed to write maintenance history to ZK!");
+    }
   }
 
   @Override
diff --git 
a/helix-core/src/main/java/org/apache/helix/model/ControllerHistory.java 
b/helix-core/src/main/java/org/apache/helix/model/ControllerHistory.java
new file mode 100644
index 0000000..4d87ffc
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/model/ControllerHistory.java
@@ -0,0 +1,212 @@
+package org.apache.helix.model;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+import org.apache.helix.HelixProperty;
+import org.apache.helix.ZNRecord;
+import org.codehaus.jackson.map.ObjectMapper;
+
+/**
+ * The history of instances that have served as the leader controller
+ */
+public class ControllerHistory extends HelixProperty {
+  private final static int HISTORY_SIZE = 10;
+  private final static int MAINTENANCE_HISTORY_SIZE = 20;
+
+  private enum ConfigProperty {
+    HISTORY,
+    TIME,
+    DATE,
+    VERSION,
+    CONTROLLER
+  }
+
+  private enum MaintenanceConfigKey {
+    MAINTENANCE_HISTORY,
+    OPERATION_TYPE,
+    DATE,
+    REASON
+
+  }
+
+  private enum OperationType {
+    // The following are options for OPERATION_TYPE in MaintenanceConfigKey
+    ENTER,
+    EXIT
+  }
+
+  public enum HistoryUpdaterType {
+    CONTROLLER_LEADERSHIP,
+    MAINTENANCE
+  }
+
+  public ControllerHistory(String id) {
+    super(id);
+  }
+
+  public ControllerHistory(ZNRecord record) {
+    super(record);
+  }
+
+  /**
+   * Save up to HISTORY_SIZE number of leaders in FIFO order
+   * @param clusterName the cluster the instance leads
+   * @param instanceName the name of the leader instance
+   */
+  public ZNRecord updateHistory(String clusterName, String instanceName, 
String version) {
+    /* keep this for back-compatible */
+    // TODO: remove this in future when we confirmed no one consumes it
+    List<String> list = _record.getListField(clusterName);
+    if (list == null) {
+      list = new ArrayList<>();
+      _record.setListField(clusterName, list);
+    }
+
+    while (list.size() >= HISTORY_SIZE) {
+      list.remove(0);
+    }
+    list.add(instanceName);
+    // TODO: remove above in future when we confirmed no one consumes it */
+
+    List<String> historyList = 
_record.getListField(ConfigProperty.HISTORY.name());
+    if (historyList == null) {
+      historyList = new ArrayList<>();
+      _record.setListField(ConfigProperty.HISTORY.name(), historyList);
+    }
+
+    // Keep only the last HISTORY_SIZE entries
+    while (historyList.size() >= HISTORY_SIZE) {
+      historyList.remove(0);
+    }
+
+    Map<String, String> historyEntry = new HashMap<>();
+
+    long currentTime = System.currentTimeMillis();
+    DateFormat df = new SimpleDateFormat("yyyy-MM-dd-HH:mm:ss");
+    df.setTimeZone(TimeZone.getTimeZone("UTC"));
+    String dateTime = df.format(new Date(currentTime));
+
+    historyEntry.put(ConfigProperty.CONTROLLER.name(), instanceName);
+    historyEntry.put(ConfigProperty.TIME.name(), String.valueOf(currentTime));
+    historyEntry.put(ConfigProperty.DATE.name(), dateTime);
+    historyEntry.put(ConfigProperty.VERSION.name(), version);
+
+    historyList.add(historyEntry.toString());
+    return _record;
+  }
+
+  /**
+   * Get history list
+   * @return
+   */
+  public List<String> getHistoryList() {
+    List<String> historyList = 
_record.getListField(ConfigProperty.HISTORY.name());
+    if (historyList == null) {
+      historyList = new ArrayList<>();
+    }
+
+    return historyList;
+  }
+
+  /**
+   * Record up to MAINTENANCE_HISTORY_SIZE number of changes to 
MaintenanceSignal in FIFO order.
+   * @param enabled
+   * @param reason
+   * @param currentTime
+   * @param internalReason
+   * @param customFields
+   * @param triggeringEntity
+   */
+  public ZNRecord updateMaintenanceHistory(boolean enabled, String reason, 
long currentTime,
+      MaintenanceSignal.AutoTriggerReason internalReason, Map<String, String> 
customFields,
+      MaintenanceSignal.TriggeringEntity triggeringEntity) throws IOException {
+    List<String> maintenanceHistoryList =
+        _record.getListField(MaintenanceConfigKey.MAINTENANCE_HISTORY.name());
+    if (maintenanceHistoryList == null) {
+      maintenanceHistoryList = new ArrayList<>();
+      _record.setListField(MaintenanceConfigKey.MAINTENANCE_HISTORY.name(), 
maintenanceHistoryList);
+    }
+
+    // Keep only the last MAINTENANCE_HISTORY_SIZE entries
+    while (maintenanceHistoryList.size() >= MAINTENANCE_HISTORY_SIZE) {
+      maintenanceHistoryList.remove(0);
+    }
+
+    DateFormat df = new SimpleDateFormat("yyyy-MM-dd-HH:" + "mm:ss");
+    df.setTimeZone(TimeZone.getTimeZone("UTC"));
+    String dateTime = df.format(new Date(currentTime));
+
+    // Populate maintenanceEntry
+    Map<String, String> maintenanceEntry = new HashMap<>();
+    maintenanceEntry.put(MaintenanceConfigKey.OPERATION_TYPE.name(),
+        enabled ? OperationType.ENTER.name() : OperationType.EXIT.name());
+    maintenanceEntry.put(MaintenanceConfigKey.REASON.name(), reason);
+    maintenanceEntry.put(MaintenanceConfigKey.DATE.name(), dateTime);
+    
maintenanceEntry.put(MaintenanceSignal.MaintenanceSignalProperty.TIMESTAMP.name(),
+        String.valueOf(currentTime));
+    
maintenanceEntry.put(MaintenanceSignal.MaintenanceSignalProperty.TRIGGERED_BY.name(),
+        triggeringEntity.name());
+    if (triggeringEntity == MaintenanceSignal.TriggeringEntity.CONTROLLER) {
+      // If auto-triggered
+      
maintenanceEntry.put(MaintenanceSignal.MaintenanceSignalProperty.AUTO_TRIGGER_REASON.name(),
+          internalReason.name());
+    } else {
+      // If manually triggered
+      if (customFields != null && !customFields.isEmpty()) {
+        for (Map.Entry<String, String> customFieldEntry : 
customFields.entrySet()) {
+          if (!maintenanceEntry.containsKey(customFieldEntry.getKey())) {
+            // Make sure custom entries do not overwrite pre-defined fields
+            maintenanceEntry.put(customFieldEntry.getKey(), 
customFieldEntry.getValue());
+          }
+        }
+      }
+    }
+    maintenanceHistoryList.add(new 
ObjectMapper().writeValueAsString(maintenanceEntry));
+    return _record;
+  }
+
+  /**
+   * Get maintenance history list.
+   * @return
+   */
+  public List<String> getMaintenanceHistoryList() {
+    List<String> maintenanceHistoryList =
+        _record.getListField(MaintenanceConfigKey.MAINTENANCE_HISTORY.name());
+    if (maintenanceHistoryList == null) {
+      maintenanceHistoryList = new ArrayList<>();
+    }
+    return maintenanceHistoryList;
+  }
+
+  @Override
+  public boolean isValid() {
+    return true;
+  }
+}
diff --git a/helix-core/src/main/java/org/apache/helix/model/LeaderHistory.java 
b/helix-core/src/main/java/org/apache/helix/model/LeaderHistory.java
deleted file mode 100644
index 31d125f..0000000
--- a/helix-core/src/main/java/org/apache/helix/model/LeaderHistory.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package org.apache.helix.model;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-import org.apache.helix.HelixProperty;
-import org.apache.helix.ZNRecord;
-
-/**
- * The history of instances that have served as the leader controller
- */
-public class LeaderHistory extends HelixProperty {
-  private final static int HISTORY_SIZE = 10;
-
-  private enum ConfigProperty {
-    HISTORY,
-    TIME,
-    DATE,
-    VERSION,
-    CONTROLLER
-  }
-
-  public LeaderHistory(String id) {
-    super(id);
-  }
-
-  public LeaderHistory(ZNRecord record) {
-    super(record);
-  }
-
-  /**
-   * Save up to HISTORY_SIZE number of leaders in FIFO order
-   * @param clusterName the cluster the instance leads
-   * @param instanceName the name of the leader instance
-   */
-  public void updateHistory(String clusterName, String instanceName, String 
version) {
-    /* keep this for back-compatible */
-    // TODO: remove this in future when we confirmed no one consumes it
-    List<String> list = _record.getListField(clusterName);
-    if (list == null) {
-      list = new ArrayList<String>();
-      _record.setListField(clusterName, list);
-    }
-
-    if (list.size() == HISTORY_SIZE) {
-      list.remove(0);
-    }
-    list.add(instanceName);
-    // TODO: remove above in future when we confirmed no one consumes it */
-
-    List<String> historyList = 
_record.getListField(ConfigProperty.HISTORY.name());
-    if (historyList == null) {
-      historyList = new ArrayList<String>();
-      _record.setListField(ConfigProperty.HISTORY.name(), historyList);
-    }
-
-    if (historyList.size() == HISTORY_SIZE) {
-      historyList.remove(0);
-    }
-
-    Map<String, String> historyEntry = new HashMap<String, String>();
-
-    long currentTime = System.currentTimeMillis();
-    DateFormat df = new SimpleDateFormat("yyyy-MM-dd-HH:mm:ss");
-    df.setTimeZone(TimeZone.getTimeZone("UTC"));
-    String dateTime = df.format(new Date(currentTime));
-
-    historyEntry.put(ConfigProperty.CONTROLLER.name(), instanceName);
-    historyEntry.put(ConfigProperty.TIME.name(), String.valueOf(currentTime));
-    historyEntry.put(ConfigProperty.DATE.name(), dateTime);
-    historyEntry.put(ConfigProperty.VERSION.name(), version);
-
-    historyList.add(historyEntry.toString());
-  }
-
-  /**
-   * Get history list
-   * @return
-   */
-  public List<String> getHistoryList() {
-    List<String> historyList = 
_record.getListField(ConfigProperty.HISTORY.name());
-    if (historyList == null) {
-      historyList = new ArrayList<String>();
-    }
-
-    return historyList;
-  }
-
-  @Override
-  public boolean isValid() {
-    return true;
-  }
-}
diff --git 
a/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java 
b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java
index 41cbd0e..511e6f3 100644
--- a/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java
+++ b/helix-core/src/main/java/org/apache/helix/model/MaintenanceSignal.java
@@ -29,7 +29,7 @@ public class MaintenanceSignal extends PauseSignal {
   /**
    * Pre-defined fields set by Helix Controller only.
    */
-  private enum MaintenanceSignalProperty {
+  public enum MaintenanceSignalProperty {
     TRIGGERED_BY,
     TIMESTAMP,
     AUTO_TRIGGER_REASON
diff --git 
a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
 
b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
index 510cfe1..ccf7c12 100644
--- 
a/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
+++ 
b/helix-core/src/test/java/org/apache/helix/integration/controller/TestClusterMaintenanceMode.java
@@ -20,14 +20,21 @@ package org.apache.helix.integration.controller;
  */
 
 import com.google.common.collect.ImmutableMap;
+import java.io.IOException;
+import java.util.HashMap;
 import java.util.Map;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.PropertyKey;
 import org.apache.helix.integration.manager.MockParticipantManager;
 import org.apache.helix.integration.task.TaskTestBase;
 import org.apache.helix.integration.task.WorkflowGenerator;
 import org.apache.helix.model.ClusterConfig;
+import org.apache.helix.model.ControllerHistory;
 import org.apache.helix.model.ExternalView;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.MaintenanceSignal;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.type.TypeFactory;
 import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
@@ -37,6 +44,8 @@ public class TestClusterMaintenanceMode extends TaskTestBase {
   private MockParticipantManager _newInstance;
   private String newResourceAddedDuringMaintenanceMode =
       String.format("%s_%s", WorkflowGenerator.DEFAULT_TGT_DB, 1);
+  private HelixDataAccessor _dataAccessor;
+  private PropertyKey.Builder _keyBuilder;
 
   @BeforeClass
   public void beforeClass() throws Exception {
@@ -45,6 +54,8 @@ public class TestClusterMaintenanceMode extends TaskTestBase {
     _numReplicas = 3;
     _numPartitions = 5;
     super.beforeClass();
+    _dataAccessor = _manager.getHelixDataAccessor();
+    _keyBuilder = _dataAccessor.keyBuilder();
   }
 
   @AfterClass
@@ -143,8 +154,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     Thread.sleep(500L);
 
     // Check that the cluster is in maintenance
-    MaintenanceSignal maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    MaintenanceSignal maintenanceSignal = 
_dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNotNull(maintenanceSignal);
 
     // Now bring up 2 instances
@@ -156,8 +166,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     Thread.sleep(500L);
 
     // Check that the cluster is no longer in maintenance (auto-recovered)
-    maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    maintenanceSignal = _dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNull(maintenanceSignal);
   }
 
@@ -181,8 +190,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     Thread.sleep(500L);
 
     // The cluster should still be in maintenance because it was enabled 
manually
-    MaintenanceSignal maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    MaintenanceSignal maintenanceSignal = 
_dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNotNull(maintenanceSignal);
   }
 
@@ -202,8 +210,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     Thread.sleep(500L);
 
     // Check that maintenance signal was triggered by Controller
-    MaintenanceSignal maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    MaintenanceSignal maintenanceSignal = 
_dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNotNull(maintenanceSignal);
     Assert.assertEquals(maintenanceSignal.getTriggeringEntity(),
         MaintenanceSignal.TriggeringEntity.CONTROLLER);
@@ -216,8 +223,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     Thread.sleep(500L);
 
     // Check that maintenance mode has successfully overwritten with the right 
TRIGGERED_BY field
-    maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    maintenanceSignal = _dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertEquals(maintenanceSignal.getTriggeringEntity(),
         MaintenanceSignal.TriggeringEntity.USER);
     for (Map.Entry<String, String> entry : customFields.entrySet()) {
@@ -244,8 +250,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
 
     // Since 3 instances are missing, the cluster should have gone back under 
maintenance
     // automatically
-    MaintenanceSignal maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    MaintenanceSignal maintenanceSignal = 
_dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNotNull(maintenanceSignal);
     Assert.assertEquals(maintenanceSignal.getTriggeringEntity(),
         MaintenanceSignal.TriggeringEntity.CONTROLLER);
@@ -261,8 +266,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     Thread.sleep(500L);
 
     // Check that the cluster exited maintenance
-    maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    maintenanceSignal = _dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNull(maintenanceSignal);
 
     // Kill 3 instances, which would put cluster in maintenance automatically
@@ -272,8 +276,7 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
     Thread.sleep(500L);
 
     // Check that cluster is back under maintenance
-    maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    maintenanceSignal = _dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNotNull(maintenanceSignal);
     Assert.assertEquals(maintenanceSignal.getTriggeringEntity(),
         MaintenanceSignal.TriggeringEntity.CONTROLLER);
@@ -298,12 +301,71 @@ public class TestClusterMaintenanceMode extends 
TaskTestBase {
 
     // Check that the cluster is still in maintenance (should not have 
auto-exited because it would
     // fail the MaxPartitionsPerInstance check)
-    maintenanceSignal = _manager.getHelixDataAccessor()
-        
.getProperty(_manager.getHelixDataAccessor().keyBuilder().maintenance());
+    maintenanceSignal = _dataAccessor.getProperty(_keyBuilder.maintenance());
     Assert.assertNotNull(maintenanceSignal);
     Assert.assertEquals(maintenanceSignal.getTriggeringEntity(),
         MaintenanceSignal.TriggeringEntity.CONTROLLER);
     Assert.assertEquals(maintenanceSignal.getAutoTriggerReason(),
         
MaintenanceSignal.AutoTriggerReason.MAX_PARTITION_PER_INSTANCE_EXCEEDED);
   }
+
+  /**
+   * Test that the Controller correctly records maintenance history in various 
situations.
+   * @throws InterruptedException
+   */
+  @Test(dependsOnMethods = "testMaxPartitionLimit")
+  public void testMaintenanceHistory() throws InterruptedException, 
IOException {
+    // In maintenance mode, by controller, for 
MAX_PARTITION_PER_INSTANCE_EXCEEDED
+    ControllerHistory history = 
_dataAccessor.getProperty(_keyBuilder.controllerLeaderHistory());
+    Map<String, String> lastHistoryEntry = convertStringToMap(
+        
history.getMaintenanceHistoryList().get(history.getMaintenanceHistoryList().size()
 - 1));
+
+    // **The KV pairs are hard-coded in here for the ease of reading!**
+    Assert.assertEquals(lastHistoryEntry.get("OPERATION_TYPE"), "ENTER");
+    Assert.assertEquals(lastHistoryEntry.get("TRIGGERED_BY"), "CONTROLLER");
+    Assert.assertEquals(lastHistoryEntry.get("AUTO_TRIGGER_REASON"),
+        "MAX_PARTITION_PER_INSTANCE_EXCEEDED");
+
+    // Remove the maxPartitionPerInstance config
+    ClusterConfig clusterConfig = 
_manager.getConfigAccessor().getClusterConfig(CLUSTER_NAME);
+    clusterConfig.setMaxPartitionsPerInstance(-1);
+    _manager.getConfigAccessor().setClusterConfig(CLUSTER_NAME, clusterConfig);
+    System.out.println("Set clusterconfig");
+
+    Thread.sleep(500L);
+
+    // Now check that the cluster exited maintenance
+    // EXIT, CONTROLLER, for MAX_PARTITION_PER_INSTANCE_EXCEEDED
+    history = _dataAccessor.getProperty(_keyBuilder.controllerLeaderHistory());
+    lastHistoryEntry = convertStringToMap(
+        
history.getMaintenanceHistoryList().get(history.getMaintenanceHistoryList().size()
 - 1));
+    Assert.assertEquals(lastHistoryEntry.get("OPERATION_TYPE"), "EXIT");
+    Assert.assertEquals(lastHistoryEntry.get("TRIGGERED_BY"), "CONTROLLER");
+    Assert.assertEquals(lastHistoryEntry.get("AUTO_TRIGGER_REASON"),
+        "MAX_PARTITION_PER_INSTANCE_EXCEEDED");
+
+    // Manually put the cluster in maintenance with a custom field
+    Map<String, String> customFieldMap = ImmutableMap.of("k1", "v1", "k2", 
"v2");
+    
_gSetupTool.getClusterManagementTool().manuallyEnableMaintenanceMode(CLUSTER_NAME,
 true, "TEST",
+        customFieldMap);
+    Thread.sleep(500L);
+    // ENTER, USER, for reason TEST, no internalReason
+    history = _dataAccessor.getProperty(_keyBuilder.controllerLeaderHistory());
+    lastHistoryEntry = convertStringToMap(
+        
history.getMaintenanceHistoryList().get(history.getMaintenanceHistoryList().size()
 - 1));
+    Assert.assertEquals(lastHistoryEntry.get("OPERATION_TYPE"), "ENTER");
+    Assert.assertEquals(lastHistoryEntry.get("TRIGGERED_BY"), "USER");
+    Assert.assertEquals(lastHistoryEntry.get("REASON"), "TEST");
+    Assert.assertNull(lastHistoryEntry.get("AUTO_TRIGGER_REASON"));
+  }
+
+  /**
+   * Convert a String representation of a Map into a Map object for 
verification purposes.
+   * @param value
+   * @return
+   */
+  private static Map<String, String> convertStringToMap(String value) throws 
IOException {
+    return new ObjectMapper().readValue(value,
+        TypeFactory.mapType(HashMap.class, String.class, String.class));
+  }
 }
diff --git 
a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerHistory.java
 
b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerHistory.java
index 8a20b22..a01afcd 100644
--- 
a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerHistory.java
+++ 
b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerHistory.java
@@ -26,7 +26,7 @@ import org.apache.helix.InstanceType;
 import org.apache.helix.PropertyKey;
 import org.apache.helix.integration.common.ZkStandAloneCMTestBase;
 import org.apache.helix.integration.manager.ClusterControllerManager;
-import org.apache.helix.model.LeaderHistory;
+import org.apache.helix.model.ControllerHistory;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
@@ -40,9 +40,9 @@ public class TestControllerHistory extends 
ZkStandAloneCMTestBase {
 
     PropertyKey.Builder keyBuilder = new PropertyKey.Builder(CLUSTER_NAME);
     PropertyKey propertyKey = keyBuilder.controllerLeaderHistory();
-    LeaderHistory leaderHistory = 
manager.getHelixDataAccessor().getProperty(propertyKey);
-    Assert.assertNotNull(leaderHistory);
-    List<String> list = leaderHistory.getRecord().getListField("HISTORY");
+    ControllerHistory controllerHistory = 
manager.getHelixDataAccessor().getProperty(propertyKey);
+    Assert.assertNotNull(controllerHistory);
+    List<String> list = controllerHistory.getRecord().getListField("HISTORY");
     Assert.assertEquals(list.size(), 1);
 
     for (int i = 0; i <= 12; i++) {
@@ -51,9 +51,9 @@ public class TestControllerHistory extends 
ZkStandAloneCMTestBase {
       _controller.syncStart();
     }
 
-    leaderHistory = manager.getHelixDataAccessor().getProperty(propertyKey);
-    Assert.assertNotNull(leaderHistory);
-    list = leaderHistory.getRecord().getListField("HISTORY");
+    controllerHistory = 
manager.getHelixDataAccessor().getProperty(propertyKey);
+    Assert.assertNotNull(controllerHistory);
+    list = controllerHistory.getRecord().getListField("HISTORY");
     Assert.assertEquals(list.size(), 10);
     manager.disconnect();
   }
diff --git 
a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/ClusterAccessor.java
 
b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/ClusterAccessor.java
index 3892fc6..e901d9c 100644
--- 
a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/ClusterAccessor.java
+++ 
b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/ClusterAccessor.java
@@ -43,7 +43,7 @@ import org.apache.helix.manager.zk.ZKUtil;
 import org.apache.helix.manager.zk.client.HelixZkClient;
 import org.apache.helix.model.ClusterConfig;
 import org.apache.helix.model.HelixConfigScope;
-import org.apache.helix.model.LeaderHistory;
+import org.apache.helix.model.ControllerHistory;
 import org.apache.helix.model.LiveInstance;
 import org.apache.helix.model.Message;
 import org.apache.helix.model.StateModelDefinition;
@@ -329,7 +329,7 @@ public class ClusterAccessor extends AbstractHelixResource {
     Map<String, Object> controllerHistory = new HashMap<>();
     controllerHistory.put(Properties.id.name(), clusterId);
 
-    LeaderHistory history =
+    ControllerHistory history =
         
dataAccessor.getProperty(dataAccessor.keyBuilder().controllerLeaderHistory());
     if (history != null) {
       controllerHistory.put(Properties.history.name(), 
history.getHistoryList());

Reply via email to