lhotari commented on code in PR #22560:
URL: https://github.com/apache/pulsar/pull/22560#discussion_r2123039637


##########
managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java:
##########
@@ -2208,42 +2211,149 @@ private void internalReadFromLedger(ReadHandle ledger, 
OpReadEntry opReadEntry)
 
         long lastEntry = min(firstEntry + 
opReadEntry.getNumberOfEntriesToRead() - 1, lastEntryInLedger);
 
-        // Filer out and skip unnecessary read entry
-        if (opReadEntry.skipCondition != null) {
-            long firstValidEntry = -1L;
-            long lastValidEntry = -1L;
-            long entryId = firstEntry;
-            for (; entryId <= lastEntry; entryId++) {
-                if 
(opReadEntry.skipCondition.test(PositionFactory.create(ledger.getId(), 
entryId))) {
-                    if (firstValidEntry != -1L) {
-                        break;
-                    }
-                } else {
-                    if (firstValidEntry == -1L) {
-                        firstValidEntry = entryId;
-                    }
+        Predicate<Position> skipCondition = opReadEntry.skipCondition;
+        if (skipCondition == null) {
+            if (log.isDebugEnabled()) {
+                log.debug("[{}] Reading entries from ledger {} - first={} 
last={}", name, ledger.getId(), firstEntry,
+                        lastEntry);
+            }
+            asyncReadEntry(ledger, firstEntry, lastEntry, opReadEntry, 
opReadEntry.ctx);
+            return;
+        }
 
-                    lastValidEntry = entryId;
-                }
+        // Skip entries that don't match the predicate
+        SortedSet<Long> entryIds = new TreeSet<>();
+        for (long entryId = firstEntry; entryId <= lastEntry; entryId++) {
+            Position position = PositionFactory.create(ledger.getId(), 
entryId);
+            if (skipCondition.test(position)) {
+                continue;
             }
+            entryIds.add(entryId);
+        }
 
-            // If all messages in [firstEntry...lastEntry] are filter out,
-            // then manual call internalReadEntriesComplete to advance read 
position.
-            if (firstValidEntry == -1L) {
-                
opReadEntry.internalReadEntriesComplete(Collections.emptyList(), 
opReadEntry.ctx,
-                        PositionFactory.create(ledger.getId(), lastEntry));
-                return;
+        Position lastReadPosition = PositionFactory.create(ledger.getId(), 
lastEntry);
+        if (entryIds.isEmpty()) {
+            // Move `readPosition` of `cursor`.
+            opReadEntry.internalReadEntriesComplete(Collections.emptyList(), 
opReadEntry.ctx, lastReadPosition);
+            return;
+        }
+
+        List<Pair<Long, Long>> ranges = toRanges(entryIds);
+        ReadEntriesCallback callback = new BatchReadEntriesCallback(entryIds, 
opReadEntry, lastReadPosition);
+        for (Pair<Long, Long> pair : ranges) {
+            long start = pair.getLeft();
+            long end = pair.getRight();
+            asyncReadEntry(ledger, start, end, 
opReadEntry.cursor.isCacheReadEntry(), callback, opReadEntry.ctx);
+        }
+    }
+
+    @VisibleForTesting
+    public static List<Pair<Long, Long>> toRanges(SortedSet<Long> entryIds) {
+        List<Pair<Long, Long>> ranges = new ArrayList<>();
+        long start = entryIds.first();
+        long end = start;
+        for (long entryId : entryIds) {
+            if (entryId - end > 1) {
+                ranges.add(Pair.of(start, end));
+                start = entryId;
+                end = start;
+            } else {
+                end = entryId;
             }
+        }
+        ranges.add(Pair.of(start, end));
+        return ranges;
+    }

Review Comment:
   We could use `it.unimi.dsi.fastutil.longs.LongLongImmutablePair` (implements 
`it.unimi.dsi.fastutil.longs.LongLongPair`) from fastutil to avoid creating 
`java.lang.Long` wrapper instances. No `java.lang.Long` instances are created 
if the `leftLong` and `rightLong` methods are used.
   
   



##########
managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java:
##########
@@ -2208,42 +2211,149 @@ private void internalReadFromLedger(ReadHandle ledger, 
OpReadEntry opReadEntry)
 
         long lastEntry = min(firstEntry + 
opReadEntry.getNumberOfEntriesToRead() - 1, lastEntryInLedger);
 
-        // Filer out and skip unnecessary read entry
-        if (opReadEntry.skipCondition != null) {
-            long firstValidEntry = -1L;
-            long lastValidEntry = -1L;
-            long entryId = firstEntry;
-            for (; entryId <= lastEntry; entryId++) {
-                if 
(opReadEntry.skipCondition.test(PositionFactory.create(ledger.getId(), 
entryId))) {
-                    if (firstValidEntry != -1L) {
-                        break;
-                    }
-                } else {
-                    if (firstValidEntry == -1L) {
-                        firstValidEntry = entryId;
-                    }
+        Predicate<Position> skipCondition = opReadEntry.skipCondition;
+        if (skipCondition == null) {
+            if (log.isDebugEnabled()) {
+                log.debug("[{}] Reading entries from ledger {} - first={} 
last={}", name, ledger.getId(), firstEntry,
+                        lastEntry);
+            }
+            asyncReadEntry(ledger, firstEntry, lastEntry, opReadEntry, 
opReadEntry.ctx);
+            return;
+        }
 
-                    lastValidEntry = entryId;
-                }
+        // Skip entries that don't match the predicate
+        SortedSet<Long> entryIds = new TreeSet<>();

Review Comment:
   We could use `it.unimi.dsi.fastutil.longs.LongSortedSet` implemented by 
`it.unimi.dsi.fastutil.longs.LongAVLTreeSet` from `fastutil` here. The benefit 
is that there wouldn't be a need to create `java.long.Long` "box" instances at 
runtime. This would be to help reduce the overhead to minimum.
   
   Using `fastutil` would require this dependency in `managed-ledger/pom.xml` 
   ```
       <dependency>
         <groupId>it.unimi.dsi</groupId>
         <artifactId>fastutil</artifactId>
       </dependency>
   ```
   We currently already use `fastutil` in multiple Pulsar modules.



##########
managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java:
##########
@@ -2261,6 +2371,21 @@ protected void asyncReadEntry(ReadHandle ledger, 
Position position, ReadEntryCal
         }
     }
 
+    protected void asyncReadEntry(ReadHandle ledger, long firstEntry, long 
lastEntry, boolean shouldCacheEntries,
+                               ReadEntriesCallback callback, Object ctx) {

Review Comment:
   is there a way to reduce code duplication by making the existing `protected 
void asyncReadEntry(ReadHandle ledger, long firstEntry, long lastEntry, 
OpReadEntry opReadEntry,
               Object ctx)` method delegate to this method?



##########
managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java:
##########
@@ -2208,42 +2211,149 @@ private void internalReadFromLedger(ReadHandle ledger, 
OpReadEntry opReadEntry)
 
         long lastEntry = min(firstEntry + 
opReadEntry.getNumberOfEntriesToRead() - 1, lastEntryInLedger);
 
-        // Filer out and skip unnecessary read entry
-        if (opReadEntry.skipCondition != null) {
-            long firstValidEntry = -1L;
-            long lastValidEntry = -1L;
-            long entryId = firstEntry;
-            for (; entryId <= lastEntry; entryId++) {
-                if 
(opReadEntry.skipCondition.test(PositionFactory.create(ledger.getId(), 
entryId))) {
-                    if (firstValidEntry != -1L) {
-                        break;
-                    }
-                } else {
-                    if (firstValidEntry == -1L) {
-                        firstValidEntry = entryId;
-                    }
+        Predicate<Position> skipCondition = opReadEntry.skipCondition;
+        if (skipCondition == null) {
+            if (log.isDebugEnabled()) {
+                log.debug("[{}] Reading entries from ledger {} - first={} 
last={}", name, ledger.getId(), firstEntry,
+                        lastEntry);
+            }
+            asyncReadEntry(ledger, firstEntry, lastEntry, opReadEntry, 
opReadEntry.ctx);
+            return;
+        }
 
-                    lastValidEntry = entryId;
-                }
+        // Skip entries that don't match the predicate
+        SortedSet<Long> entryIds = new TreeSet<>();
+        for (long entryId = firstEntry; entryId <= lastEntry; entryId++) {
+            Position position = PositionFactory.create(ledger.getId(), 
entryId);
+            if (skipCondition.test(position)) {
+                continue;
             }
+            entryIds.add(entryId);
+        }
 
-            // If all messages in [firstEntry...lastEntry] are filter out,
-            // then manual call internalReadEntriesComplete to advance read 
position.
-            if (firstValidEntry == -1L) {
-                
opReadEntry.internalReadEntriesComplete(Collections.emptyList(), 
opReadEntry.ctx,
-                        PositionFactory.create(ledger.getId(), lastEntry));
-                return;
+        Position lastReadPosition = PositionFactory.create(ledger.getId(), 
lastEntry);
+        if (entryIds.isEmpty()) {
+            // Move `readPosition` of `cursor`.
+            opReadEntry.internalReadEntriesComplete(Collections.emptyList(), 
opReadEntry.ctx, lastReadPosition);
+            return;
+        }
+
+        List<Pair<Long, Long>> ranges = toRanges(entryIds);
+        ReadEntriesCallback callback = new BatchReadEntriesCallback(entryIds, 
opReadEntry, lastReadPosition);
+        for (Pair<Long, Long> pair : ranges) {
+            long start = pair.getLeft();
+            long end = pair.getRight();
+            asyncReadEntry(ledger, start, end, 
opReadEntry.cursor.isCacheReadEntry(), callback, opReadEntry.ctx);
+        }
+    }
+
+    @VisibleForTesting
+    public static List<Pair<Long, Long>> toRanges(SortedSet<Long> entryIds) {
+        List<Pair<Long, Long>> ranges = new ArrayList<>();
+        long start = entryIds.first();
+        long end = start;
+        for (long entryId : entryIds) {
+            if (entryId - end > 1) {
+                ranges.add(Pair.of(start, end));
+                start = entryId;
+                end = start;
+            } else {
+                end = entryId;
             }
+        }
+        ranges.add(Pair.of(start, end));
+        return ranges;
+    }
 
-            firstEntry = firstValidEntry;
-            lastEntry = lastValidEntry;
+    @VisibleForTesting
+    public static class BatchReadEntriesCallback implements 
ReadEntriesCallback {
+        private final SortedSet<Long> entryIds;
+        private final List<Entry> entries;
+        private final OpReadEntry callback;
+        private volatile boolean completed = false;
+        private final Position lastReadPosition;
+
+        @VisibleForTesting
+        public BatchReadEntriesCallback(SortedSet<Long> entryIdSet, 
OpReadEntry callback,
+                                        Position lastReadPosition) {
+            this.entryIds = entryIdSet;
+            this.entries = new ArrayList<>(entryIdSet.size());
+            this.callback = callback;
+            this.lastReadPosition = lastReadPosition;
         }
 
-        if (log.isDebugEnabled()) {
-            log.debug("[{}] Reading entries from ledger {} - first={} 
last={}", name, ledger.getId(), firstEntry,
-                    lastEntry);
+        @Override
+        public synchronized void readEntriesComplete(List<Entry> entries0, 
Object ctx) {
+            if (completed) {
+                for (Entry entry : entries0) {
+                    entry.release();
+                }
+                return;
+            }
+            entries.addAll(entries0);
+            if (entries.size() < entryIds.size()) {
+                return;
+            }
+            completed = true;
+            // Make sure the entries are in the correct order
+            entries.sort(Comparator.comparingLong(Entry::getEntryId));

Review Comment:
   I guess using `Entry::getPosition` for sorting would be more generic?



##########
managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java:
##########
@@ -2208,42 +2211,149 @@ private void internalReadFromLedger(ReadHandle ledger, 
OpReadEntry opReadEntry)
 
         long lastEntry = min(firstEntry + 
opReadEntry.getNumberOfEntriesToRead() - 1, lastEntryInLedger);
 
-        // Filer out and skip unnecessary read entry
-        if (opReadEntry.skipCondition != null) {
-            long firstValidEntry = -1L;
-            long lastValidEntry = -1L;
-            long entryId = firstEntry;
-            for (; entryId <= lastEntry; entryId++) {
-                if 
(opReadEntry.skipCondition.test(PositionFactory.create(ledger.getId(), 
entryId))) {
-                    if (firstValidEntry != -1L) {
-                        break;
-                    }
-                } else {
-                    if (firstValidEntry == -1L) {
-                        firstValidEntry = entryId;
-                    }
+        Predicate<Position> skipCondition = opReadEntry.skipCondition;
+        if (skipCondition == null) {
+            if (log.isDebugEnabled()) {
+                log.debug("[{}] Reading entries from ledger {} - first={} 
last={}", name, ledger.getId(), firstEntry,
+                        lastEntry);
+            }
+            asyncReadEntry(ledger, firstEntry, lastEntry, opReadEntry, 
opReadEntry.ctx);
+            return;
+        }
 
-                    lastValidEntry = entryId;
-                }
+        // Skip entries that don't match the predicate
+        SortedSet<Long> entryIds = new TreeSet<>();
+        for (long entryId = firstEntry; entryId <= lastEntry; entryId++) {
+            Position position = PositionFactory.create(ledger.getId(), 
entryId);
+            if (skipCondition.test(position)) {
+                continue;
             }

Review Comment:
   implementing a mutable `org.apache.bookkeeper.mledger.Position` 
implementation could be useful here to avoid creating new object instances. 
It's sufficient to just implement getLedgerId() and getEntryId() to implement 
Position.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to