This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 6fea9b2a18 [core] Introduce BTreeIndexReader.createIterator to 
iterator all records in BTree (#7318)
6fea9b2a18 is described below

commit 6fea9b2a1868a71664ae3a5a026ab62aed008200
Author: Jingsong Lee <[email protected]>
AuthorDate: Sat Feb 28 13:31:18 2026 +0800

    [core] Introduce BTreeIndexReader.createIterator to iterator all records in 
BTree (#7318)
---
 .../paimon/globalindex/btree/BTreeIndexReader.java | 86 +++++++++++++++++++++-
 .../btree/LazyFilteredBTreeIndexReaderTest.java    | 68 +++++++++++++++++
 2 files changed, 153 insertions(+), 1 deletion(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
 
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
index 068cf9993b..c079730541 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java
@@ -33,8 +33,10 @@ import org.apache.paimon.sst.BlockCache;
 import org.apache.paimon.sst.BlockHandle;
 import org.apache.paimon.sst.BlockIterator;
 import org.apache.paimon.sst.SstFileReader;
+import org.apache.paimon.sst.SstFileReader.SstFileIterator;
 import org.apache.paimon.utils.FileBasedBloomFilter;
 import org.apache.paimon.utils.LazyField;
+import org.apache.paimon.utils.Pair;
 import org.apache.paimon.utils.Preconditions;
 import org.apache.paimon.utils.RoaringNavigableMap64;
 
@@ -42,8 +44,10 @@ import javax.annotation.Nullable;
 
 import java.io.IOException;
 import java.util.Comparator;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.NoSuchElementException;
 import java.util.Optional;
 import java.util.zip.CRC32;
 
@@ -355,6 +359,15 @@ public class BTreeIndexReader implements GlobalIndexReader 
{
                         }));
     }
 
+    /**
+     * Create an iterator to traverse all key-rowId pairs in this index file.
+     *
+     * @return an iterator over all entries
+     */
+    public Iterator<Pair<Object, long[]>> createIterator() {
+        return new BTreeEntryIterator();
+    }
+
     private RoaringNavigableMap64 allNonNullRows() throws IOException {
         // Traverse all data to avoid returning null values, which is very 
advantageous in
         // situations where there are many null values
@@ -376,7 +389,7 @@ public class BTreeIndexReader implements GlobalIndexReader {
      */
     private RoaringNavigableMap64 rangeQuery(
             Object from, Object to, boolean fromInclusive, boolean 
toInclusive) throws IOException {
-        SstFileReader.SstFileIterator fileIter = reader.createIterator();
+        SstFileIterator fileIter = reader.createIterator();
         fileIter.seekTo(keySerializer.serialize(from));
 
         RoaringNavigableMap64 result = new RoaringNavigableMap64();
@@ -414,4 +427,75 @@ public class BTreeIndexReader implements GlobalIndexReader 
{
         }
         return ids;
     }
+
+    /** An iterator to traverse all key-rowId pairs in the BTree index file. */
+    private class BTreeEntryIterator implements Iterator<Pair<Object, long[]>> 
{
+
+        private final SstFileIterator fileIter;
+        private BlockIterator dataIter;
+        private Pair<Object, long[]> nextEntry;
+        private boolean nullOutputted = false;
+
+        private BTreeEntryIterator() {
+            this.fileIter = reader.createIterator();
+            if (minKey != null) {
+                fileIter.seekTo(keySerializer.serialize(minKey));
+            }
+            advance();
+        }
+
+        private void advance() {
+            // First output null values from nullBitmap
+            if (!nullOutputted) {
+                nullOutputted = true;
+                RoaringNavigableMap64 nulls = nullBitmap.get();
+                if (nulls != null && !nulls.isEmpty()) {
+                    long[] nullRowIds = new long[nulls.getIntCardinality()];
+                    int index = 0;
+                    for (Long rowId : nulls) {
+                        nullRowIds[index++] = rowId;
+                    }
+                    nextEntry = Pair.of(null, nullRowIds);
+                    return;
+                }
+            }
+
+            nextEntry = null;
+            while (true) {
+                // try to get next entry from current data block
+                if (dataIter != null && dataIter.hasNext()) {
+                    Map.Entry<MemorySlice, MemorySlice> entry = 
dataIter.next();
+                    Object key = keySerializer.deserialize(entry.getKey());
+                    long[] rowIds = deserializeRowIds(entry.getValue());
+                    nextEntry = Pair.of(key, rowIds);
+                    return;
+                }
+
+                // try to read next data block
+                try {
+                    dataIter = fileIter.readBatch();
+                    if (dataIter == null) {
+                        return; // no more data
+                    }
+                } catch (IOException e) {
+                    throw new RuntimeException("Failed to read next batch from 
SST file", e);
+                }
+            }
+        }
+
+        @Override
+        public boolean hasNext() {
+            return nextEntry != null;
+        }
+
+        @Override
+        public Pair<Object, long[]> next() {
+            if (nextEntry == null) {
+                throw new NoSuchElementException();
+            }
+            Pair<Object, long[]> result = nextEntry;
+            advance();
+            return result;
+        }
+    }
 }
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
 
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
index d3141c58fb..0182706770 100644
--- 
a/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
+++ 
b/paimon-common/src/test/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeIndexReaderTest.java
@@ -21,11 +21,21 @@ package org.apache.paimon.globalindex.btree;
 import org.apache.paimon.globalindex.GlobalIndexIOMeta;
 import org.apache.paimon.globalindex.GlobalIndexReader;
 import 
org.apache.paimon.testutils.junit.parameterized.ParameterizedTestExtension;
+import org.apache.paimon.utils.Pair;
 
+import org.junit.jupiter.api.TestTemplate;
 import org.junit.jupiter.api.extension.ExtendWith;
 
 import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import static org.assertj.core.api.Assertions.assertThat;
 
 /** Test for {@link LazyFilteredBTreeReader} to read multiple files. */
 @ExtendWith(ParameterizedTestExtension.class)
@@ -56,4 +66,62 @@ public class LazyFilteredBTreeIndexReaderTest extends 
AbstractIndexReaderTest {
 
         return written;
     }
+
+    @TestTemplate
+    public void testUnorderedIterator() throws Exception {
+        // Set some null values
+        for (int i = dataNum - 1; i >= dataNum * 0.85; i--) {
+            data.get(i).setLeft(null);
+        }
+
+        List<GlobalIndexIOMeta> written = writeData();
+
+        Comparator<Object> nullComparator = Comparator.nullsFirst(comparator);
+
+        // Build expected map from original data
+        Map<Object, Set<Long>> expectedMap = new TreeMap<>(nullComparator);
+        for (Pair<Object, Long> pair : data) {
+            Object key = pair.getKey();
+            Long rowId = pair.getValue();
+
+            if (!expectedMap.containsKey(key)) {
+                expectedMap.put(key, new TreeSet<>());
+            }
+            expectedMap.get(key).add(rowId);
+        }
+
+        Map<Object, Set<Long>> actualMap = new TreeMap<>(nullComparator);
+
+        for (GlobalIndexIOMeta index : written) {
+            try (BTreeIndexReader reader =
+                    new BTreeIndexReader(keySerializer, fileReader, index, 
CACHE_MANAGER)) {
+
+                Iterator<Pair<Object, long[]>> iter = reader.createIterator();
+
+                // Collect all entries from iterator
+                while (iter.hasNext()) {
+                    Pair<Object, long[]> entry = iter.next();
+                    Object key = entry.getLeft();
+                    long[] rowIds = entry.getRight();
+
+                    if (!actualMap.containsKey(key)) {
+                        actualMap.put(key, new TreeSet<>());
+                    }
+                    for (long rowId : rowIds) {
+                        actualMap.get(key).add(rowId);
+                    }
+                }
+            }
+        }
+
+        // Verify all keys are present
+        
assertThat(actualMap.keySet()).containsExactlyElementsOf(expectedMap.keySet());
+
+        // Verify all rowIds for each key
+        for (Object key : expectedMap.keySet()) {
+            assertThat(actualMap.get(key))
+                    .as("RowIds for key: " + key)
+                    .containsExactlyElementsOf(expectedMap.get(key));
+        }
+    }
 }

Reply via email to