SLIDER-743 add the counters and resetting feature to the Nodes, but no use of the outcome in choosing placements
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/3105ba9f Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/3105ba9f Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/3105ba9f Branch: refs/heads/develop Commit: 3105ba9f315059c5e957645625666975014ad4e5 Parents: 618cdc4 Author: Steve Loughran <[email protected]> Authored: Wed Jan 28 21:38:51 2015 +0000 Committer: Steve Loughran <[email protected]> Committed: Wed Jan 28 21:38:51 2015 +0000 ---------------------------------------------------------------------- .../slider/server/appmaster/state/AppState.java | 4 ++- .../server/appmaster/state/NodeEntry.java | 26 +++++++++++++++++--- .../server/appmaster/state/NodeInstance.java | 10 ++++++++ .../slider/server/appmaster/state/NodeMap.java | 11 +++++++++ .../server/appmaster/state/RoleHistory.java | 21 +++++++++++++--- 5 files changed, 63 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3105ba9f/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java index 4b37e14..95a7ca5 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java @@ -1711,9 +1711,11 @@ public class AppState { public void resetFailureCounts() { for (RoleStatus roleStatus : getRoleStatusMap().values()) { int failed = roleStatus.resetFailed(); - log.debug("Resetting failure count of {}; was {}", roleStatus.getName(), + log.info("Resetting failure count of {}; was {}", + roleStatus.getName(), failed); } + roleHistory.resetFailedRecently(); } /** http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3105ba9f/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java index ebddaf9..c2e203a 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeEntry.java @@ -53,6 +53,11 @@ public class NodeEntry { private int startFailed; private int failed; /** + * Counter of "failed recently" events. These are all failures + * which have happened since it was last reset. + */ + private int failedRecently; + /** * Number of live nodes. */ private int live; @@ -104,7 +109,7 @@ public class NodeEntry { live = v; } - private void incLive() { + private synchronized void incLive() { ++live; } @@ -132,6 +137,7 @@ public class NodeEntry { public synchronized boolean onStartFailed() { decStarting(); ++startFailed; + ++failedRecently; return containerCompleted(false); } @@ -183,6 +189,7 @@ public class NodeEntry { releasing = RoleHistoryUtils.decToFloor(releasing); } else { ++failed; + ++failedRecently; } decLive(); return isAvailable(); @@ -199,7 +206,7 @@ public class NodeEntry { this.lastUsed = lastUsed; } - public int getStartFailed() { + public synchronized int getStartFailed() { return startFailed; } @@ -207,6 +214,17 @@ public class NodeEntry { return failed; } + public synchronized int getFailedRecently() { + return failedRecently; + } + + /** + * Reset the failed recently count. + */ + public void resetFailedRecently() { + failedRecently = 0; + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("NodeEntry{"); @@ -214,10 +232,10 @@ public class NodeEntry { sb.append(", requested=").append(requested); sb.append(", starting=").append(starting); sb.append(", live=").append(live); - sb.append(", failed=").append(failed); - sb.append(", startFailed=").append(startFailed); sb.append(", releasing=").append(releasing); sb.append(", lastUsed=").append(lastUsed); + sb.append(", failedRecently=").append(failedRecently); + sb.append(", startFailed=").append(startFailed); sb.append('}'); return sb.toString(); } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3105ba9f/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java index bc79b71..231865e 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java @@ -132,6 +132,16 @@ public class NodeInstance { return active; } + + /** + * run through each entry resetting the failure count + */ + public synchronized void resetFailedRecently() { + for (NodeEntry entry : nodeEntries) { + entry.resetFailedRecently(); + } + } + @Override public String toString() { return hostname; http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3105ba9f/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java index 570c194..fe40086 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeMap.java @@ -102,6 +102,17 @@ public class NodeMap extends HashMap<String, NodeInstance> { } return purged; } + + + /** + * reset the failed recently counters + */ + public void resetFailedRecently() { + for (Map.Entry<String, NodeInstance> entry : entrySet()) { + NodeInstance ni = entry.getValue(); + ni.resetFailedRecently(); + } + } /** * Clone point http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/3105ba9f/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java index ce2ab0a..605a4f8 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java @@ -310,6 +310,14 @@ public class RoleHistory { } /** + * reset the failed recently counters + */ + public synchronized void resetFailedRecently() { + log.info("Resetting failure history"); + nodemap.resetFailedRecently(); + } + + /** * Get the path used for history files * @return the directory used for history files */ @@ -480,18 +488,23 @@ public class RoleHistory { } int roleKey = role.getKey(); NodeInstance nodeInstance = null; - + // get the list of possible targets List<NodeInstance> targets = getNodesForRoleId(roleKey); - int cnt = targets == null ? 0 : targets.size(); + if (targets == null) { + // add an empty list here for ease downstream + targets = new ArrayList<NodeInstance>(0); + } + int cnt = targets.size(); log.debug("There are {} node(s) to consider for {}", cnt, role.getName()); - while (targets != null && !targets.isEmpty() && nodeInstance == null) { + // spin until there's a candidate + while (!targets.isEmpty() && nodeInstance == null) { NodeInstance head = targets.remove(0); if (head.getActiveRoleInstances(roleKey) == 0) { nodeInstance = head; } } if (nodeInstance == null) { - log.debug("No historical node found for {}", role.getName()); + log.info("No historical node found for {}", role.getName()); } return nodeInstance; }
