This is an automated email from the ASF dual-hosted git repository.
eolivelli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new f5ddc36 [BK-GC] avoid blocking call in gc-thread
f5ddc36 is described below
commit f5ddc36cc30c5bab5d2e8ebc5fa552c2ad0d6eea
Author: Rajan Dhabalia <[email protected]>
AuthorDate: Tue May 14 23:35:18 2019 -0700
[BK-GC] avoid blocking call in gc-thread
### Motivation
Right now, we have below 3 issues because of which gc thread gets blocked
forever and it can't perform gc-task further. Below issues are mainly related
to blocking call while doing zk-operation without timeout.
bug-fixes:
1. right now, [GC -
ScanAndCompareGarbageCollector](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java#L142)
passes timeout in millisecond to
[LedgerManager](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java#L166)
but it
takes it as second and again try to convert it in millis so, 30Kms timeout
becomes [30M ms
timeout](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java#L245).
Sp, fix timeout unit during gc.
2. Right now, GC makes blocking call to get list of children on ledger
znode and sometime zk-call back doesn't comeback which blocks the gc-thread
forever. However, recently we added the timeout on the
[object-waiting-lock](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java#L243-L248)
which doesn't work because it's in while loop and `object.wait(timeout)`
completes without any exception and GC threads keep running [...]
3. add zk-timeout during delete ledgers in bookie else it can also block
the GC thread.
### Changes
add timeout while bk-gc makes zk-call to verify deleted ledgers.
Reviewers: Enrico Olivelli <[email protected]>, Sijie Guo
<[email protected]>, Rajan Dhabalia <[email protected]>
This closes #1940 from rdhabalia/verify_gc
---
.../bookie/ScanAndCompareGarbageCollector.java | 23 ++++++++++++++++------
.../bookkeeper/meta/CleanupLedgerManager.java | 4 ++--
.../apache/bookkeeper/meta/FlatLedgerManager.java | 4 ++--
.../bookkeeper/meta/HierarchicalLedgerManager.java | 6 +++---
.../org/apache/bookkeeper/meta/LedgerManager.java | 4 ++--
.../meta/LegacyHierarchicalLedgerManager.java | 12 +++++------
.../meta/LongHierarchicalLedgerManager.java | 12 +++++------
.../bookkeeper/meta/MSLedgerManagerFactory.java | 2 +-
.../java/org/apache/bookkeeper/util/ZkUtils.java | 12 ++++++++---
.../apache/bookkeeper/bookie/CompactionTest.java | 2 +-
.../client/ParallelLedgerRecoveryTest.java | 4 ++--
.../apache/bookkeeper/meta/MockLedgerManager.java | 2 +-
.../metadata/etcd/EtcdLedgerManager.java | 2 +-
13 files changed, 53 insertions(+), 36 deletions(-)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
index 247b610..02e47ae 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
@@ -32,6 +32,9 @@ import java.util.SortedMap;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
import org.apache.bookkeeper.client.BKException;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.apache.bookkeeper.meta.LedgerManager;
@@ -139,8 +142,9 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
}
// Iterate over all the ledger on the metadata store
- long zkOpTimeout = this.conf.getZkTimeout() * 2;
- LedgerRangeIterator ledgerRangeIterator =
ledgerManager.getLedgerRanges(zkOpTimeout);
+ long zkOpTimeoutMs = this.conf.getZkTimeout() * 2;
+ LedgerRangeIterator ledgerRangeIterator = ledgerManager
+ .getLedgerRanges(zkOpTimeoutMs);
Set<Long> ledgersInMetadata = null;
long start;
long end = -1;
@@ -167,13 +171,20 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
if (verifyMetadataOnGc) {
int rc = BKException.Code.OK;
try {
-
result(ledgerManager.readLedgerMetadata(bkLid));
- } catch (BKException e) {
- rc = e.getCode();
+
result(ledgerManager.readLedgerMetadata(bkLid), zkOpTimeoutMs,
TimeUnit.MILLISECONDS);
+ } catch (BKException | TimeoutException e) {
+ if (e instanceof BKException) {
+ rc = ((BKException) e).getCode();
+ } else {
+ LOG.warn("Time-out while fetching metadata
for Ledger {} : {}.", bkLid,
+ e.getMessage());
+
+ continue;
+ }
}
if (rc !=
BKException.Code.NoSuchLedgerExistsOnMetadataServerException) {
LOG.warn("Ledger {} Missing in metadata list,
but ledgerManager returned rc: {}.",
- bkLid, rc);
+ bkLid, rc);
continue;
}
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java
index 3c7ddc5..a3ce432 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java
@@ -210,13 +210,13 @@ public class CleanupLedgerManager implements
LedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
closeLock.readLock().lock();
try {
if (closed) {
return new ClosedLedgerRangeIterator();
}
- return underlying.getLedgerRanges(zkOpTimeoutSec);
+ return underlying.getLedgerRanges(zkOpTimeoutMs);
} finally {
closeLock.readLock().unlock();
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java
index 8c7f05f..7c89326 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java
@@ -89,7 +89,7 @@ class FlatLedgerManager extends AbstractZkLedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeOutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
return new LedgerRangeIterator() {
// single iterator, can visit only one time
boolean nextCalled = false;
@@ -103,7 +103,7 @@ class FlatLedgerManager extends AbstractZkLedgerManager {
try {
zkActiveLedgers = ledgerListToSet(
- ZkUtils.getChildrenInSingleNode(zk,
ledgerRootPath, zkOpTimeOutSec),
+ ZkUtils.getChildrenInSingleNode(zk,
ledgerRootPath, zkOpTimeoutMs),
ledgerRootPath);
nextRange = new LedgerRange(zkActiveLedgers);
} catch (KeeperException.NoNodeException e) {
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java
index 079fcfa..9cd6aed 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java
@@ -87,9 +87,9 @@ class HierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- LedgerRangeIterator legacyLedgerRangeIterator =
legacyLM.getLedgerRanges(zkOpTimeoutSec);
- LedgerRangeIterator longLedgerRangeIterator =
longLM.getLedgerRanges(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ LedgerRangeIterator legacyLedgerRangeIterator =
legacyLM.getLedgerRanges(zkOpTimeoutMs);
+ LedgerRangeIterator longLedgerRangeIterator =
longLM.getLedgerRanges(zkOpTimeoutMs);
return new HierarchicalLedgerRangeIterator(legacyLedgerRangeIterator,
longLedgerRangeIterator);
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java
index 15da14b..56c447d 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java
@@ -149,12 +149,12 @@ public interface LedgerManager extends Closeable {
/**
* Loop to scan a range of metadata from metadata storage.
*
- * @param zkOpTimeOutSec
+ * @param zkOpTimeOutMs
* Iterator considers timeout while fetching ledger-range from
* zk.
* @return will return a iterator of the Ranges
*/
- LedgerRangeIterator getLedgerRanges(long zkOpTimeOutSec);
+ LedgerRangeIterator getLedgerRanges(long zkOpTimeOutMs);
/**
* Used to represent the Ledgers range returned from the
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java
index 1a63407..9828719 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java
@@ -153,8 +153,8 @@ class LegacyHierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- return new LegacyHierarchicalLedgerRangeIterator(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ return new LegacyHierarchicalLedgerRangeIterator(zkOpTimeoutMs);
}
/**
@@ -166,10 +166,10 @@ class LegacyHierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager
private String curL1Nodes = "";
private boolean iteratorDone = false;
private LedgerRange nextRange = null;
- private final long zkOpTimeoutSec;
+ private final long zkOpTimeoutMs;
- public LegacyHierarchicalLedgerRangeIterator(long zkOpTimeoutSec) {
- this.zkOpTimeoutSec = zkOpTimeoutSec;
+ public LegacyHierarchicalLedgerRangeIterator(long zkOpTimeoutMs) {
+ this.zkOpTimeoutMs = zkOpTimeoutMs;
}
/**
@@ -266,7 +266,7 @@ class LegacyHierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager
String nodePath = nodeBuilder.toString();
List<String> ledgerNodes = null;
try {
- ledgerNodes = ZkUtils.getChildrenInSingleNode(zk, nodePath,
zkOpTimeoutSec);
+ ledgerNodes = ZkUtils.getChildrenInSingleNode(zk, nodePath,
zkOpTimeoutMs);
} catch (KeeperException.NoNodeException e) {
/* If the node doesn't exist, we must have raced with a
recursive node removal, just
* return an empty list. */
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java
index 95f8e48..7ec2f5a 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java
@@ -139,8 +139,8 @@ class LongHierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- return new LongHierarchicalLedgerRangeIterator(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ return new LongHierarchicalLedgerRangeIterator(zkOpTimeoutMs);
}
@@ -149,7 +149,7 @@ class LongHierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager {
*/
private class LongHierarchicalLedgerRangeIterator implements
LedgerRangeIterator {
LedgerRangeIterator rootIterator;
- final long zkOpTimeoutSec;
+ final long zkOpTimeoutMs;
/**
* Returns all children with path as a parent. If path is
non-existent,
@@ -163,7 +163,7 @@ class LongHierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager {
*/
List<String> getChildrenAt(String path) throws IOException {
try {
- List<String> children = ZkUtils.getChildrenInSingleNode(zk,
path, zkOpTimeoutSec);
+ List<String> children = ZkUtils.getChildrenInSingleNode(zk,
path, zkOpTimeoutMs);
Collections.sort(children);
return children;
} catch (KeeperException.NoNodeException e) {
@@ -285,8 +285,8 @@ class LongHierarchicalLedgerManager extends
AbstractHierarchicalLedgerManager {
}
}
- private LongHierarchicalLedgerRangeIterator(long zkOpTimeoutSec) {
- this.zkOpTimeoutSec = zkOpTimeoutSec;
+ private LongHierarchicalLedgerRangeIterator(long zkOpTimeoutMs) {
+ this.zkOpTimeoutMs = zkOpTimeoutMs;
}
private void bootstrap() throws IOException {
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java
index 201a0a3..9b21c87 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java
@@ -642,7 +642,7 @@ public class MSLedgerManagerFactory extends
AbstractZkLedgerManagerFactory {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
return new MSLedgerRangeIterator();
}
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java
index 23d0b42..cc0612f 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java
@@ -25,7 +25,6 @@ import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.bookkeeper.conf.AbstractConfiguration;
@@ -222,7 +221,7 @@ public class ZkUtils {
* @throws InterruptedException
* @throws IOException
*/
- public static List<String> getChildrenInSingleNode(final ZooKeeper zk,
final String node, long timeOutSec)
+ public static List<String> getChildrenInSingleNode(final ZooKeeper zk,
final String node, long zkOpTimeoutMs)
throws InterruptedException, IOException,
KeeperException.NoNodeException {
final GetChildrenCtx ctx = new GetChildrenCtx();
getChildrenInSingleNode(zk, node, new GenericCallback<List<String>>() {
@@ -240,13 +239,20 @@ public class ZkUtils {
});
synchronized (ctx) {
+ long startTime = System.currentTimeMillis();
while (!ctx.done) {
try {
- ctx.wait(timeOutSec > 0 ?
TimeUnit.SECONDS.toMillis(timeOutSec) : 0);
+ ctx.wait(zkOpTimeoutMs > 0 ? zkOpTimeoutMs : 0);
} catch (InterruptedException e) {
ctx.rc = Code.OPERATIONTIMEOUT.intValue();
ctx.done = true;
}
+ // timeout the process if get-children response not received
+ // zkOpTimeoutMs.
+ if (zkOpTimeoutMs > 0 && (System.currentTimeMillis() -
startTime) >= zkOpTimeoutMs) {
+ ctx.rc = Code.OPERATIONTIMEOUT.intValue();
+ ctx.done = true;
+ }
}
}
if (Code.NONODE.intValue() == ctx.rc) {
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java
index 0d6e5d6..adef116 100644
---
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java
@@ -970,7 +970,7 @@ public abstract class CompactionTest extends
BookKeeperClusterTestCase {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long
zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs)
{
final AtomicBoolean hasnext = new AtomicBoolean(true);
return new LedgerManager.LedgerRangeIterator() {
@Override
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java
index e6c3c22..177cbe3 100644
---
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java
@@ -112,8 +112,8 @@ public class ParallelLedgerRecoveryTest extends
BookKeeperClusterTestCase {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- return lm.getLedgerRanges(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ return lm.getLedgerRanges(zkOpTimeoutMs);
}
@Override
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java
index 0f818b6..952837b 100644
---
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java
@@ -188,7 +188,7 @@ public class MockLedgerManager implements LedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
return null;
}
diff --git
a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java
b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java
index a40a4bc..f91f2f7 100644
---
a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java
+++
b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java
@@ -420,7 +420,7 @@ class EtcdLedgerManager implements LedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long opTimeOutSec) {
+ public LedgerRangeIterator getLedgerRanges(long opTimeOutMs) {
KeyStream<Long> ks = new KeyStream<>(
kvClient,
ByteSequence.fromString(EtcdUtils.getLedgerKey(scope, 0L)),