Repository: helix Updated Branches: refs/heads/master 196675cd9 -> f011ea3ee
fixed a bug at WriteLock caused by read-delete race on a znode. Bug description: T1 currently owns a zk lock as signified by znode n1, T2 creates a znode n2 and realizes n1 is saller. T2 is going to register a watcher on n1 but at the same moment T1 released n1. T2 register fails, breaks from while loop, and wait(). Nobody will ever wake up T2 again. Consequently all subsequent callers for the same lock are also blocked. Test: Repeated our loadtest and the bug doesn't reappear. For detailed bug report see this post: http://mail-archives.apache.org/mod_mbox/helix-dev/201605.mbox/%3CCAB-bdySG8Uf6c1fyVHpSu-5pD99VHE=mrL=j3qnkatwaetk...@mail.gmail.com%3E Project: http://git-wip-us.apache.org/repos/asf/helix/repo Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/6ecac13e Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/6ecac13e Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/6ecac13e Branch: refs/heads/master Commit: 6ecac13e42c52f854450c98e33d2e2624d0f6167 Parents: 94e1079 Author: neutronsharc <[email protected]> Authored: Thu May 19 15:29:56 2016 -0700 Committer: neutronsharc <[email protected]> Committed: Thu May 19 15:40:01 2016 -0700 ---------------------------------------------------------------------- .../src/main/java/org/apache/helix/lock/zk/WriteLock.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/helix/blob/6ecac13e/helix-core/src/main/java/org/apache/helix/lock/zk/WriteLock.java ---------------------------------------------------------------------- diff --git a/helix-core/src/main/java/org/apache/helix/lock/zk/WriteLock.java b/helix-core/src/main/java/org/apache/helix/lock/zk/WriteLock.java index aef7618..b842ff8 100644 --- a/helix-core/src/main/java/org/apache/helix/lock/zk/WriteLock.java +++ b/helix-core/src/main/java/org/apache/helix/lock/zk/WriteLock.java @@ -179,7 +179,7 @@ class WriteLock extends ProtocolSupport { List<String> names = zookeeper.getChildren(dir, false); for (String name : names) { if (name.startsWith(prefix)) { - id = name; + id = dir + "/" + name; if (LOG.isDebugEnabled()) { LOG.debug("Found id created last time: " + id); } @@ -230,14 +230,15 @@ class WriteLock extends ProtocolSupport { ZNodeName lastChildName = lessThanMe.last(); lastChildId = lastChildName.getName(); if (LOG.isDebugEnabled()) { - LOG.debug("watching less than me node: " + lastChildId); + LOG.debug("watching less than me node: " + lastChildId + ", my id: " + idName.getName()); } Stat stat = zookeeper.exists(lastChildId, new LockWatcher()); if (stat != null) { return Boolean.FALSE; } else { LOG.warn("Could not find the" + " stats for less than me: " - + lastChildName.getName()); + + lastChildName.getName() + ", will retry"); + id = null; } } else { if (isOwner()) {
