This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new da99cb2d8d [core] Skip stats from schema in DataEvolution when column 
type has changed (#7803)
da99cb2d8d is described below

commit da99cb2d8d7bd244702b6339b6395f862c2ecddc
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon May 11 13:25:34 2026 +0530

    [core] Skip stats from schema in DataEvolution when column type has changed 
(#7803)
---
 .../operation/DataEvolutionFileStoreScan.java      | 10 +++--
 .../operation/DataEvolutionFileStoreScanTest.java  | 50 ++++++++++++++++++++++
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
 
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
index 24dc01c47b..514afff296 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/operation/DataEvolutionFileStoreScan.java
@@ -34,6 +34,7 @@ import org.apache.paimon.schema.TableSchema;
 import org.apache.paimon.stats.SimpleStats;
 import org.apache.paimon.table.SpecialFields;
 import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.DataType;
 import org.apache.paimon.types.RowType;
 import org.apache.paimon.utils.Pair;
 import org.apache.paimon.utils.Range;
@@ -257,14 +258,15 @@ public class DataEvolutionFileStoreScan extends 
AppendOnlyFileStoreScan {
                     continue;
                 }
                 int targetFieldId = allFields[j];
+                DataType targetType = schema.fields().get(j).type();
                 for (int fieldId : fieldIds) {
                     if (targetFieldId == fieldId) {
                         for (int k = 0; k < fieldIdsWithStats.length; k++) {
                             if (fieldId == fieldIdsWithStats[k]) {
-                                // TODO: If type not match (e.g. int -> 
string), we need to skip
-                                // this, set rowOffsets[j] = -1 always. (may 
-2, after all, set it
-                                // back to -1) Because schema evolution may 
happen to change int to
-                                // string or something like that.
+                                DataType fileType = 
dataFileSchemaWithStats.fields().get(k).type();
+                                if (!fileType.equalsIgnoreFieldId(targetType)) 
{
+                                    continue loop1;
+                                }
                                 rowOffsets[j] = i;
                                 fieldOffsets[j] = k;
                                 continue loop1;
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
index d23f749931..803e4ac255 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/operation/DataEvolutionFileStoreScanTest.java
@@ -260,6 +260,56 @@ public class DataEvolutionFileStoreScanTest {
         assertThat(nullCounts.isNullAt(2)).isTrue();
     }
 
+    @Test
+    public void testEvolutionStatsSkipsStatsAfterColumnTypeChange() {
+        Schema baseSchema = createSchema("f0", "f1");
+        TableSchema baseTableSchema = TableSchema.create(0L, baseSchema);
+        schemas.put(0L, baseTableSchema);
+
+        Schema evolvedSchema =
+                Schema.newBuilder()
+                        .column("f0", DataTypes.STRING())
+                        .column("f1", DataTypes.STRING())
+                        .build();
+        TableSchema evolvedTableSchema = TableSchema.create(1L, evolvedSchema);
+        schemas.put(1L, evolvedTableSchema);
+
+        ManifestEntry oldTypeEntry =
+                createManifestEntry(
+                        0L,
+                        createSimpleStats(
+                                GenericRow.of(10, 
BinaryString.fromString("a")),
+                                GenericRow.of(99, 
BinaryString.fromString("z")),
+                                createBinaryArray(new int[] {0, 0}),
+                                new int[] {0, 1}));
+
+        BinaryRow newTypeMin = new BinaryRow(2);
+        BinaryRowWriter newTypeMinWriter = new BinaryRowWriter(newTypeMin);
+        newTypeMinWriter.writeString(0, BinaryString.fromString("apple"));
+        newTypeMinWriter.writeString(1, BinaryString.fromString("banana"));
+        newTypeMinWriter.complete();
+        BinaryRow newTypeMax = new BinaryRow(2);
+        BinaryRowWriter newTypeMaxWriter = new BinaryRowWriter(newTypeMax);
+        newTypeMaxWriter.writeString(0, BinaryString.fromString("yam"));
+        newTypeMaxWriter.writeString(1, BinaryString.fromString("zebra"));
+        newTypeMaxWriter.complete();
+        SimpleStats newTypeStats =
+                new SimpleStats(newTypeMin, newTypeMax, createBinaryArray(new 
int[] {0, 0}));
+        ManifestEntry newTypeEntry = createManifestEntry(1L, newTypeStats);
+
+        EvolutionStats result =
+                DataEvolutionFileStoreScan.evolutionStats(
+                        evolvedTableSchema,
+                        scanTableSchema,
+                        Arrays.asList(oldTypeEntry, newTypeEntry));
+
+        DataEvolutionRow minRow = (DataEvolutionRow) result.minValues();
+        DataEvolutionRow maxRow = (DataEvolutionRow) result.maxValues();
+
+        assertThat(minRow.getString(0).toString()).isEqualTo("apple");
+        assertThat(maxRow.getString(0).toString()).isEqualTo("yam");
+    }
+
     @Test
     public void testIntersectsRowRanges() {
         List<Range> rowRanges =

Reply via email to