This is an automated email from the ASF dual-hosted git repository.
yong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 53954ca Add ensemble check to over-replicated ledger GC (#2813)
53954ca is described below
commit 53954caae3a4f40cf0a45718d306b6a8555c54e2
Author: shustsud <[email protected]>
AuthorDate: Fri Oct 22 11:43:23 2021 +0900
Add ensemble check to over-replicated ledger GC (#2813)
### Motivation
- Issue is as described in
[PR#2797](https://github.com/apache/bookkeeper/pull/2797).
> In one day, zookeepers became high cpu usage and disk full.
> The cause of this is bookie's gc of overreplicated ledgers.
> Gc created/deleted zk nodes under /ledgers/underreplication/locks very
frequently and some bookies ran gc at same time.
> As a result, zookeepers created a lot of snapshots and became disk full.
- I want to reduce the number of lock node creations and deletions in ZK.
### Changes
- Add an ensemble check before creating the lock node.
This is to reduce the number of lock node creations and deletions in ZK.
- ~~If [PR#2797](https://github.com/apache/bookkeeper/pull/2797) was
merged, this PR needs to be fixed.~~
---
.../bookie/ScanAndCompareGarbageCollector.java | 47 +++++++++++++---------
1 file changed, 29 insertions(+), 18 deletions(-)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
index 6f67bae..4c778a1 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
@@ -231,8 +231,12 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
for (final Long ledgerId : bkActiveledgers) {
try {
- // check if the ledger is being replicated already by the
replication worker
- if (lum.isLedgerBeingReplicated(ledgerId)) {
+ // check ledger ensembles before creating lock nodes.
+ // this is to reduce the number of lock node creations and
deletions in ZK.
+ // the ensemble check is done again after the lock node is
created.
+ // also, check if the ledger is being replicated already by
the replication worker
+ Versioned<LedgerMetadata> preCheckMetadata =
ledgerManager.readLedgerMetadata(ledgerId).get();
+ if (!isNotBookieIncludedInLedgerEnsembles(preCheckMetadata) ||
lum.isLedgerBeingReplicated(ledgerId)) {
latch.countDown();
continue;
}
@@ -245,23 +249,12 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
.whenComplete((metadata, exception) -> {
try {
if (exception == null) {
- // do not delete a ledger that is not
closed, since the ensemble might
- // change again and include the current
bookie while we are deleting it
- if (!metadata.getValue().isClosed()) {
- return;
+ if
(isNotBookieIncludedInLedgerEnsembles(metadata)) {
+ // this bookie is not supposed to have
this ledger,
+ // thus we can delete this ledger now
+ overReplicatedLedgers.add(ledgerId);
+ garbageCleaner.clean(ledgerId);
}
- SortedMap<Long, ? extends List<BookieId>>
ensembles =
- metadata.getValue().getAllEnsembles();
- for (List<BookieId> ensemble :
ensembles.values()) {
- // check if this bookie is supposed to
have this ledger
- if
(ensemble.contains(selfBookieAddress)) {
- return;
- }
- }
- // this bookie is not supposed to have
this ledger,
- // thus we can delete this ledger now
- overReplicatedLedgers.add(ledgerId);
- garbageCleaner.clean(ledgerId);
}
} finally {
semaphore.release();
@@ -302,4 +295,22 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
}
}
+ private boolean
isNotBookieIncludedInLedgerEnsembles(Versioned<LedgerMetadata> metadata) {
+ // do not delete a ledger that is not closed, since the ensemble might
+ // change again and include the current bookie while we are deleting it
+ if (!metadata.getValue().isClosed()) {
+ return false;
+ }
+
+ SortedMap<Long, ? extends List<BookieId>> ensembles =
+ metadata.getValue().getAllEnsembles();
+ for (List<BookieId> ensemble : ensembles.values()) {
+ // check if this bookie is supposed to have this ledger
+ if (ensemble.contains(selfBookieAddress)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
}