This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 3150128fcc [core] Fix blob-only multi-field data evolution read (#7936)
3150128fcc is described below
commit 3150128fcc1937e22ad35371c936200e5d936b36
Author: YeJunHao <[email protected]>
AuthorDate: Fri May 22 21:21:58 2026 +0800
[core] Fix blob-only multi-field data evolution read (#7936)
Fix a data evolution read failure when a projection contains only
multiple blob/vector-store fields and no normal field.
In this case, there is no normal data file in the split, so the expected
row count remains unknown (`-1`). The special-field bunch should not
validate its accumulated row count against this sentinel value.
---
.../apache/paimon/operation/DataEvolutionSplitRead.java | 8 +++++---
.../apache/paimon/operation/DataEvolutionReadTest.java | 15 +++++++++++++++
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
index c2f10b9a2b..0df1b1428e 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionSplitRead.java
@@ -566,9 +566,11 @@ public class DataEvolutionSplitRead implements
SplitRead<InternalRow> {
}
files.add(file);
rowCount += file.rowCount();
- checkArgument(
- rowCount <= expectedRowCount,
- "Blob/vector-store files row count exceed the expect " +
expectedRowCount);
+ if (expectedRowCount >= 0) {
+ checkArgument(
+ rowCount <= expectedRowCount,
+ "Blob/vector-store files row count exceed the expect "
+ expectedRowCount);
+ }
this.latestMaxSequenceNumber = file.maxSequenceNumber();
this.latestFistRowId = file.nonNullFirstRowId();
this.expectedNextFirstRowId = latestFistRowId + file.rowCount();
diff --git
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
index 9bb9a7274e..d6e6998e6c 100644
---
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionReadTest.java
@@ -290,6 +290,21 @@ public class DataEvolutionReadTest {
assertThat(blobBunch.files.get(3).fileName()).contains("blob18");
}
+ @Test
+ public void testBlobOnlySplitWithMultipleBlobFields() {
+ List<DataFileMeta> files = new ArrayList<>();
+ files.add(createBlobFileWithCols("blob1", 0, 100, 1,
Collections.singletonList("blobc1")));
+ files.add(createBlobFileWithCols("blob2", 0, 100, 1,
Collections.singletonList("blobc2")));
+
+ List<FieldBunch> fieldBunches =
+ splitFieldBunches(
+ files, file -> makeBlobRowType(file.writeCols(),
String::hashCode));
+
+ assertThat(fieldBunches).hasSize(2);
+ assertThat(fieldBunches.get(0).rowCount()).isEqualTo(100);
+ assertThat(fieldBunches.get(1).rowCount()).isEqualTo(100);
+ }
+
/** Creates a blob file with the specified parameters. */
private DataFileMeta createBlobFile(
String fileName, long firstRowId, long rowCount, long
maxSequenceNumber) {