ivankelly commented on a change in pull request #1205: Algorithm to find start point of compacted ledger URL: https://github.com/apache/incubator-pulsar/pull/1205#discussion_r167196940
########## File path: pulsar-broker/src/main/java/org/apache/pulsar/compaction/CompactedTopicImpl.java ########## @@ -18,10 +18,111 @@ */ package org.apache.pulsar.compaction; +import com.google.common.cache.Cache; +import com.google.common.collect.ComparisonChain; + +import java.util.NoSuchElementException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; + +import org.apache.bookkeeper.client.BKException; +import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.bookkeeper.client.LedgerEntry; import org.apache.bookkeeper.mledger.Position; +import org.apache.bookkeeper.mledger.impl.PositionImpl; +import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap.LongPair; +import org.apache.pulsar.client.api.RawMessage; +import org.apache.pulsar.client.impl.RawMessageImpl; +import org.apache.pulsar.common.api.proto.PulsarApi.MessageIdData; + + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class CompactedTopicImpl implements CompactedTopic { + final static long NEWER_THAN_COMPACTED = -0xfeed0fbaL; + @Override public void newCompactedLedger(Position p, long compactedLedgerId) {} + + static CompletableFuture<Long> findStartPoint(LedgerHandle lh, PositionImpl p, + Cache<LongPair,MessageIdData> cache) { + CompletableFuture<Long> promise = new CompletableFuture<>(); + findStartPointLoop(lh, p, 0, lh.getLastAddConfirmed(), promise, cache); + return promise; + } + + private static void findStartPointLoop(LedgerHandle lh, PositionImpl p, long start, long end, + CompletableFuture<Long> promise, + Cache<LongPair,MessageIdData> cache) { + long midpoint = start + ((end - start) / 2); + + CompletableFuture<MessageIdData> startEntry = readOneMessageId(lh, start, cache); + CompletableFuture<MessageIdData> middleEntry = readOneMessageId(lh, midpoint, cache); + CompletableFuture<MessageIdData> endEntry = readOneMessageId(lh, end, cache); + + CompletableFuture.allOf(startEntry, middleEntry, endEntry).whenComplete( + (v, exception) -> { + if (exception != null) { + promise.completeExceptionally(exception); + } + try { + if (comparePositionAndMessageId(p, startEntry.get()) < 0) { + promise.complete(start); + } else if (comparePositionAndMessageId(p, middleEntry.get()) < 0) { + findStartPointLoop(lh, p, start, midpoint, promise, cache); + } else if (comparePositionAndMessageId(p, endEntry.get()) < 0) { + findStartPointLoop(lh, p, midpoint + 1, end, promise, cache); + } else { + promise.complete(NEWER_THAN_COMPACTED); + } + } catch (InterruptedException ie) { + // should never happen as all should have been completed + Thread.currentThread().interrupt(); + log.error("Interrupted waiting on futures which should have completed", ie); + } catch (ExecutionException e) { + // shouldn't happen, allOf should have given us the exception + promise.completeExceptionally(e); + } + }); + } + + private static CompletableFuture<MessageIdData> readOneMessageId(LedgerHandle lh, long entryId, Review comment: Ah, I wasn't aware of Caffeine. Changed to use it. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services