This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new da99cb2d8d [core] Skip stats from schema in DataEvolution when column
type has changed (#7803)
da99cb2d8d is described below
commit da99cb2d8d7bd244702b6339b6395f862c2ecddc
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon May 11 13:25:34 2026 +0530
[core] Skip stats from schema in DataEvolution when column type has changed
(#7803)
---
.../operation/DataEvolutionFileStoreScan.java | 10 +++--
.../operation/DataEvolutionFileStoreScanTest.java | 50 ++++++++++++++++++++++
2 files changed, 56 insertions(+), 4 deletions(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
index 24dc01c47b..514afff296 100644
---
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
+++
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
@@ -34,6 +34,7 @@ import org.apache.paimon.schema.TableSchema;
import org.apache.paimon.stats.SimpleStats;
import org.apache.paimon.table.SpecialFields;
import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.DataType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.Pair;
import org.apache.paimon.utils.Range;
@@ -257,14 +258,15 @@ public class DataEvolutionFileStoreScan extends
AppendOnlyFileStoreScan {
continue;
}
int targetFieldId = allFields[j];
+ DataType targetType = schema.fields().get(j).type();
for (int fieldId : fieldIds) {
if (targetFieldId == fieldId) {
for (int k = 0; k < fieldIdsWithStats.length; k++) {
if (fieldId == fieldIdsWithStats[k]) {
- // TODO: If type not match (e.g. int ->
string), we need to skip
- // this, set rowOffsets[j] = -1 always. (may
-2, after all, set it
- // back to -1) Because schema evolution may
happen to change int to
- // string or something like that.
+ DataType fileType =
dataFileSchemaWithStats.fields().get(k).type();
+ if (!fileType.equalsIgnoreFieldId(targetType))
{
+ continue loop1;
+ }
rowOffsets[j] = i;
fieldOffsets[j] = k;
continue loop1;
diff --git
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
index d23f749931..803e4ac255 100644
---
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
@@ -260,6 +260,56 @@ public class DataEvolutionFileStoreScanTest {
assertThat(nullCounts.isNullAt(2)).isTrue();
}
+ @Test
+ public void testEvolutionStatsSkipsStatsAfterColumnTypeChange() {
+ Schema baseSchema = createSchema("f0", "f1");
+ TableSchema baseTableSchema = TableSchema.create(0L, baseSchema);
+ schemas.put(0L, baseTableSchema);
+
+ Schema evolvedSchema =
+ Schema.newBuilder()
+ .column("f0", DataTypes.STRING())
+ .column("f1", DataTypes.STRING())
+ .build();
+ TableSchema evolvedTableSchema = TableSchema.create(1L, evolvedSchema);
+ schemas.put(1L, evolvedTableSchema);
+
+ ManifestEntry oldTypeEntry =
+ createManifestEntry(
+ 0L,
+ createSimpleStats(
+ GenericRow.of(10,
BinaryString.fromString("a")),
+ GenericRow.of(99,
BinaryString.fromString("z")),
+ createBinaryArray(new int[] {0, 0}),
+ new int[] {0, 1}));
+
+ BinaryRow newTypeMin = new BinaryRow(2);
+ BinaryRowWriter newTypeMinWriter = new BinaryRowWriter(newTypeMin);
+ newTypeMinWriter.writeString(0, BinaryString.fromString("apple"));
+ newTypeMinWriter.writeString(1, BinaryString.fromString("banana"));
+ newTypeMinWriter.complete();
+ BinaryRow newTypeMax = new BinaryRow(2);
+ BinaryRowWriter newTypeMaxWriter = new BinaryRowWriter(newTypeMax);
+ newTypeMaxWriter.writeString(0, BinaryString.fromString("yam"));
+ newTypeMaxWriter.writeString(1, BinaryString.fromString("zebra"));
+ newTypeMaxWriter.complete();
+ SimpleStats newTypeStats =
+ new SimpleStats(newTypeMin, newTypeMax, createBinaryArray(new
int[] {0, 0}));
+ ManifestEntry newTypeEntry = createManifestEntry(1L, newTypeStats);
+
+ EvolutionStats result =
+ DataEvolutionFileStoreScan.evolutionStats(
+ evolvedTableSchema,
+ scanTableSchema,
+ Arrays.asList(oldTypeEntry, newTypeEntry));
+
+ DataEvolutionRow minRow = (DataEvolutionRow) result.minValues();
+ DataEvolutionRow maxRow = (DataEvolutionRow) result.maxValues();
+
+ assertThat(minRow.getString(0).toString()).isEqualTo("apple");
+ assertThat(maxRow.getString(0).toString()).isEqualTo("yam");
+ }
+
@Test
public void testIntersectsRowRanges() {
List<Range> rowRanges =