This is an automated email from the ASF dual-hosted git repository.

junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 026676549b [core] Fix parquet nextRowPosition bug (#4636)
026676549b is described below

commit 026676549b9ae5f60f3f13ae1774933dd0705152
Author: aiden.dong <[email protected]>
AuthorDate: Wed Dec 4 19:02:22 2024 +0800

    [core] Fix parquet nextRowPosition bug (#4636)
---
 .../paimon/table/PrimaryKeyFileStoreTableTest.java | 61 ++++++++++++++++++++--
 .../format/parquet/ParquetReaderFactory.java       |  2 +-
 2 files changed, 59 insertions(+), 4 deletions(-)

diff --git 
a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
index e80b49a0f0..fa635e2ab6 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/table/PrimaryKeyFileStoreTableTest.java
@@ -812,6 +812,27 @@ public class PrimaryKeyFileStoreTableTest extends 
FileStoreTableTestBase {
 
     @Test
     public void testDeletionVectorsWithParquetFilter() throws Exception {
+        // RowGroup record range [pk] :
+        //
+        // RowGroup-0 :  [0-93421)
+        // RowGroup-1 :  [93421-187794)
+        // RowGroup-2 :  [187794-200000)
+        //
+        // ColumnPage record count :
+        //
+        // col-0 : 300
+        // col-1 : 200
+        // col-2 : 300
+        // col-3 : 300
+        // col-4 : 300
+        // col-5 : 200
+        // col-6 : 100
+        // col-7 : 100
+        // col-8 : 100
+        // col-9 : 100
+        // col-10 : 100
+        // col-11 : 300
+
         FileStoreTable table =
                 createFileStoreTable(
                         conf -> {
@@ -842,7 +863,11 @@ public class PrimaryKeyFileStoreTableTest extends 
FileStoreTableTestBase {
                         writeBuilder
                                 .newWrite()
                                 .withIOManager(new 
IOManagerImpl(tempDir.toString()));
-        for (int i = 180000; i < 200000; i++) {
+        for (int i = 110000; i < 115000; i++) {
+            write.write(rowDataWithKind(RowKind.DELETE, 1, i, i * 100L));
+        }
+
+        for (int i = 130000; i < 135000; i++) {
             write.write(rowDataWithKind(RowKind.DELETE, 1, i, i * 100L));
         }
 
@@ -854,8 +879,10 @@ public class PrimaryKeyFileStoreTableTest extends 
FileStoreTableTestBase {
         List<Split> splits = 
toSplits(table.newSnapshotReader().read().dataSplits());
         Random random = new Random();
 
+        // point filter
+
         for (int i = 0; i < 10; i++) {
-            int value = random.nextInt(180000);
+            int value = random.nextInt(110000);
             TableRead read = table.newRead().withFilter(builder.equal(1, 
value)).executeFilter();
             assertThat(getResult(read, splits, BATCH_ROW_TO_STRING))
                     .isEqualTo(
@@ -866,10 +893,38 @@ public class PrimaryKeyFileStoreTableTest extends 
FileStoreTableTestBase {
         }
 
         for (int i = 0; i < 10; i++) {
-            int value = 180000 + random.nextInt(20000);
+            int value = 130000 + random.nextInt(5000);
             TableRead read = table.newRead().withFilter(builder.equal(1, 
value)).executeFilter();
             assertThat(getResult(read, splits, BATCH_ROW_TO_STRING)).isEmpty();
         }
+
+        TableRead tableRead =
+                table.newRead()
+                        .withFilter(
+                                PredicateBuilder.and(
+                                        builder.greaterOrEqual(1, 100000),
+                                        builder.lessThan(1, 150000)))
+                        .executeFilter();
+
+        List<String> result = getResult(tableRead, splits, 
BATCH_ROW_TO_STRING);
+
+        assertThat(result.size()).isEqualTo(40000); // filter 10000
+
+        assertThat(result)
+                
.doesNotContain("1|110000|11000000|binary|varbinary|mapKey:mapVal|multiset");
+        assertThat(result)
+                
.doesNotContain("1|114999|11499900|binary|varbinary|mapKey:mapVal|multiset");
+        assertThat(result)
+                
.doesNotContain("1|130000|13000000|binary|varbinary|mapKey:mapVal|multiset");
+        assertThat(result)
+                
.doesNotContain("1|134999|13499900|binary|varbinary|mapKey:mapVal|multiset");
+        
assertThat(result).contains("1|100000|10000000|binary|varbinary|mapKey:mapVal|multiset");
+        
assertThat(result).contains("1|149999|14999900|binary|varbinary|mapKey:mapVal|multiset");
+
+        
assertThat(result).contains("1|101099|10109900|binary|varbinary|mapKey:mapVal|multiset");
+        
assertThat(result).contains("1|115000|11500000|binary|varbinary|mapKey:mapVal|multiset");
+        
assertThat(result).contains("1|129999|12999900|binary|varbinary|mapKey:mapVal|multiset");
+        
assertThat(result).contains("1|135000|13500000|binary|varbinary|mapKey:mapVal|multiset");
     }
 
     @Test
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
index 0c99653120..6f8cab2202 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
@@ -491,7 +491,7 @@ public class ParquetReaderFactory implements 
FormatReaderFactory {
                     nextIndex = 
this.currentRowGroupReadState.currentRangeStart();
                 }
 
-                return nextIndex;
+                return this.currentRowGroupFirstRowIndex + nextIndex;
             }
         }
 

Reply via email to