(paimon) branch master updated: [core] Fix blob-only multi-field data evolution read (#7936)

lzljs3620320 Fri, 22 May 2026 06:22:40 -0700

This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new 3150128fcc [core] Fix blob-only multi-field data evolution read (#7936)
3150128fcc is described below

commit 3150128fcc1937e22ad35371c936200e5d936b36
Author: YeJunHao <[email protected]>
AuthorDate: Fri May 22 21:21:58 2026 +0800

    [core] Fix blob-only multi-field data evolution read (#7936)
    
    Fix a data evolution read failure when a projection contains only
    multiple blob/vector-store fields and no normal field.
    
    In this case, there is no normal data file in the split, so the expected
    row count remains unknown (`-1`). The special-field bunch should not
    validate its accumulated row count against this sentinel value.
---
 .../apache/paimon/operation/DataEvolutionSplitRead.java   |  8 +++++---
 .../apache/paimon/operation/DataEvolutionReadTest.java    | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
 
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
index c2f10b9a2b..0df1b1428e 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
@@ -566,9 +566,11 @@ public class DataEvolutionSplitRead implements 
SplitRead<InternalRow> {
             }
             files.add(file);
             rowCount += file.rowCount();
-            checkArgument(
-                    rowCount <= expectedRowCount,
-                    "Blob/vector-store files row count exceed the expect " + 
expectedRowCount);
+            if (expectedRowCount >= 0) {
+                checkArgument(
+                        rowCount <= expectedRowCount,
+                        "Blob/vector-store files row count exceed the expect " 
+ expectedRowCount);
+            }
             this.latestMaxSequenceNumber = file.maxSequenceNumber();
             this.latestFistRowId = file.nonNullFirstRowId();
             this.expectedNextFirstRowId = latestFistRowId + file.rowCount();
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
index 9bb9a7274e..d6e6998e6c 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
@@ -290,6 +290,21 @@ public class DataEvolutionReadTest {
         assertThat(blobBunch.files.get(3).fileName()).contains("blob18");
     }
 
+    @Test
+    public void testBlobOnlySplitWithMultipleBlobFields() {
+        List<DataFileMeta> files = new ArrayList<>();
+        files.add(createBlobFileWithCols("blob1", 0, 100, 1, 
Collections.singletonList("blobc1")));
+        files.add(createBlobFileWithCols("blob2", 0, 100, 1, 
Collections.singletonList("blobc2")));
+
+        List<FieldBunch> fieldBunches =
+                splitFieldBunches(
+                        files, file -> makeBlobRowType(file.writeCols(), 
String::hashCode));
+
+        assertThat(fieldBunches).hasSize(2);
+        assertThat(fieldBunches.get(0).rowCount()).isEqualTo(100);
+        assertThat(fieldBunches.get(1).rowCount()).isEqualTo(100);
+    }
+
     /** Creates a blob file with the specified parameters. */
     private DataFileMeta createBlobFile(
             String fileName, long firstRowId, long rowCount, long 
maxSequenceNumber) {

(paimon) branch master updated: [core] Fix blob-only multi-field data evolution read (#7936)

Reply via email to