This is an automated email from the ASF dual-hosted git repository.

leaves12138 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 6a658f6  feat: add simple stats core utilities (#68)
6a658f6 is described below

commit 6a658f6c46318d53a40ae5e27fe939858bb22640
Author: Yonghao Fang <[email protected]>
AuthorDate: Wed Jun 10 10:40:21 2026 +0800

    feat: add simple stats core utilities (#68)
---
 src/paimon/core/stats/simple_stats.cpp             | 116 +++++
 src/paimon/core/stats/simple_stats.h               |  91 ++++
 src/paimon/core/stats/simple_stats_collector.cpp   | 213 +++++++++
 src/paimon/core/stats/simple_stats_collector.h     |  60 +++
 .../core/stats/simple_stats_collector_test.cpp     | 115 +++++
 src/paimon/core/stats/simple_stats_converter.cpp   | 299 +++++++++++++
 src/paimon/core/stats/simple_stats_converter.h     |  40 ++
 src/paimon/core/stats/simple_stats_evolution.cpp   | 318 ++++++++++++++
 src/paimon/core/stats/simple_stats_evolution.h     |  93 ++++
 .../core/stats/simple_stats_evolution_test.cpp     | 479 +++++++++++++++++++++
 src/paimon/core/stats/simple_stats_evolutions.h    |  55 +++
 src/paimon/core/stats/simple_stats_test.cpp        |  50 +++
 12 files changed, 1929 insertions(+)

diff --git a/src/paimon/core/stats/simple_stats.cpp 
b/src/paimon/core/stats/simple_stats.cpp
new file mode 100644
index 0000000..b8a5ee3
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats.cpp
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/stats/simple_stats.h"
+
+#include <utility>
+#include <vector>
+
+#include "arrow/api.h"
+#include "paimon/common/data/binary_row_writer.h"
+#include "paimon/common/data/binary_section.h"
+#include "paimon/common/data/internal_array.h"
+#include "paimon/common/data/internal_row.h"
+#include "paimon/common/utils/murmurhash_utils.h"
+#include "paimon/common/utils/serialization_utils.h"
+#include "paimon/macros.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class Bytes;
+
+const SimpleStats& SimpleStats::EmptyStats() {
+    static const SimpleStats kEmptyStats(
+        BinaryRow::EmptyRow(), BinaryRow::EmptyRow(),
+        BinaryArray::FromLongArray(std::vector<int64_t>(), 
GetDefaultPool().get()));
+    return kEmptyStats;
+}
+
+BinaryRow SimpleStats::ToRow() const {
+    BinaryRow row(3);
+    std::shared_ptr<MemoryPool> pool = GetDefaultPool();
+    BinaryRowWriter writer(&row, 32 * 1024, pool.get());
+    auto min_value_bytes = SerializationUtils::SerializeBinaryRow(min_values_, 
pool.get());
+    writer.WriteBinary(0, *min_value_bytes);
+    auto max_value_bytes = SerializationUtils::SerializeBinaryRow(max_values_, 
pool.get());
+    writer.WriteBinary(1, *max_value_bytes);
+    writer.WriteArray(2, null_counts_);
+    writer.Complete();
+    return row;
+}
+
+Result<SimpleStats> SimpleStats::FromRow(const InternalRow* row, MemoryPool* 
pool) {
+    if (PAIMON_UNLIKELY(row == nullptr)) {
+        return Status::Invalid("internal row is null pointer");
+    }
+    if (PAIMON_UNLIKELY(pool == nullptr)) {
+        return Status::Invalid("memory pool is null pointer");
+    }
+    std::shared_ptr<Bytes> min_value = row->GetBinary(0);
+    if (PAIMON_UNLIKELY(min_value == nullptr)) {
+        return Status::Invalid("get min value from internal row failed.");
+    }
+    std::shared_ptr<Bytes> max_value = row->GetBinary(1);
+    if (PAIMON_UNLIKELY(max_value == nullptr)) {
+        return Status::Invalid("get max value from internal row failed.");
+    }
+    std::shared_ptr<InternalArray> null_counts = row->GetArray(2);
+    if (PAIMON_UNLIKELY(null_counts == nullptr)) {
+        return Status::Invalid("get null counts from internal row failed.");
+    }
+    PAIMON_ASSIGN_OR_RAISE(BinaryRow min_values,
+                           
SerializationUtils::DeserializeBinaryRow(min_value));
+    PAIMON_ASSIGN_OR_RAISE(BinaryRow max_values,
+                           
SerializationUtils::DeserializeBinaryRow(max_value));
+    if (min_values.GetFieldCount() == 0 && max_values.GetFieldCount() == 0 &&
+        null_counts->Size() == 0) {
+        return SimpleStats::EmptyStats();
+    }
+    return SimpleStats(min_values, max_values, 
BinaryArray::FromLongArray(null_counts.get(), pool));
+}
+
+int32_t SimpleStats::HashCode() const {
+    int32_t min_hash = min_values_.HashCode();
+    int32_t max_hash = max_values_.HashCode();
+    int32_t null_hash = null_counts_.HashCode();
+    int32_t hash = 
MurmurHashUtils::HashUnsafeBytes(reinterpret_cast<void*>(&max_hash), 0,
+                                                    sizeof(max_hash), 
min_hash);
+    return 
MurmurHashUtils::HashUnsafeBytes(reinterpret_cast<void*>(&null_hash), 0,
+                                            sizeof(null_hash), hash);
+}
+
+bool SimpleStats::operator==(const SimpleStats& other) const {
+    if (this == &other) {
+        return true;
+    }
+    return min_values_ == other.min_values_ && max_values_ == 
other.max_values_ &&
+           null_counts_ == other.null_counts_;
+}
+
+const std::shared_ptr<arrow::DataType>& SimpleStats::DataType() {
+    static std::shared_ptr<arrow::DataType> data_type = arrow::struct_(
+        {arrow::field("_MIN_VALUES", arrow::binary(), /*nullable=*/false),
+         arrow::field("_MAX_VALUES", arrow::binary(), /*nullable=*/false),
+         arrow::field("_NULL_COUNTS", arrow::list(arrow::field("item", 
arrow::int64(), true)),
+                      /*nullable=*/true)});
+    return data_type;
+}
+
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats.h 
b/src/paimon/core/stats/simple_stats.h
new file mode 100644
index 0000000..5bb67dc
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats.h
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "arrow/api.h"
+#include "fmt/format.h"
+#include "paimon/common/data/binary_array.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/binary_row_writer.h"
+#include "paimon/result.h"
+namespace arrow {
+class ArrayBuilder;
+}  // namespace arrow
+
+namespace paimon {
+class InternalRow;
+class MemoryPool;
+
+/// The statistics for columns, supports the following stats.
+///
+/// <ul>
+/// <li>min_values: the minimum values of the columns
+/// <li>max_values: the maximum values of the columns
+/// <li>null_counts: the number of nulls of the columns
+/// </ul>
+///
+/// All statistics are stored in the form of a Binary, which can significantly 
reduce its memory
+/// consumption, but the cost is that the column type needs to be known when 
getting.
+class SimpleStats {
+ public:
+    SimpleStats(const BinaryRow& min_values, const BinaryRow& max_values,
+                const BinaryArray& null_counts)
+        : min_values_(min_values), max_values_(max_values), 
null_counts_(null_counts) {}
+
+    /// Empty stats for 0 column number.
+    static const SimpleStats& EmptyStats();
+
+    const BinaryRow& MinValues() const {
+        return min_values_;
+    }
+
+    const BinaryRow& MaxValues() const {
+        return max_values_;
+    }
+
+    const BinaryArray& NullCounts() const {
+        return null_counts_;
+    }
+
+    BinaryRow ToRow() const;
+
+    static Result<SimpleStats> FromRow(const InternalRow* row, MemoryPool* 
pool);
+
+    std::string ToString() const {
+        return fmt::format("SimpleStats@{:#x}", 
static_cast<uint32_t>(HashCode()));
+    }
+
+    int32_t HashCode() const;
+
+    bool operator==(const SimpleStats& other) const;
+
+    static const std::shared_ptr<arrow::DataType>& DataType();
+
+ private:
+    BinaryRow min_values_;
+    BinaryRow max_values_;
+    BinaryArray null_counts_;
+};
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_collector.cpp 
b/src/paimon/core/stats/simple_stats_collector.cpp
new file mode 100644
index 0000000..7cd0440
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_collector.cpp
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/stats/simple_stats_collector.h"
+
+#include <cassert>
+#include <cstdint>
+#include <optional>
+#include <string>
+
+#include "arrow/api.h"
+#include "fmt/format.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/format/column_stats.h"
+
+namespace paimon {
+
+SimpleStatsCollector::SimpleStatsCollector(const 
std::shared_ptr<arrow::Schema>& schema)
+    : schema_(schema) {
+    column_stats_.resize(schema_->num_fields());
+}
+
+Status SimpleStatsCollector::Collect(const BinaryRow& row) {
+    assert(schema_);
+    if (schema_->num_fields() != row.GetFieldCount()) {
+        return Status::Invalid(fmt::format(
+            "fields count {} in partition schema not equal to fields count {} 
in partition",
+            schema_->num_fields(), row.GetFieldCount()));
+    }
+    for (int32_t i = 0; i < schema_->num_fields(); i++) {
+        const auto& field = schema_->field(i);
+        const auto& type = field->type()->id();
+        switch (type) {
+            case arrow::Type::BOOL: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateBooleanColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<BooleanColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetBoolean(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::INT8: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateTinyIntColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<TinyIntColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetByte(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::INT16: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateSmallIntColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<SmallIntColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetShort(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::INT32: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] =
+                        ColumnStats::CreateIntColumnStats(std::nullopt, 
std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<IntColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetInt(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::INT64: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateBigIntColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<BigIntColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetLong(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::FLOAT: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateFloatColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<FloatColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetFloat(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::DOUBLE: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateDoubleColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<DoubleColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetDouble(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::STRING:
+            case arrow::Type::BINARY: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateStringColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<StringColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetString(i).ToString());
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            case arrow::Type::DATE32: {
+                if (column_stats_[i] == nullptr) {
+                    column_stats_[i] = ColumnStats::CreateDateColumnStats(
+                        std::nullopt, std::nullopt, std::nullopt);
+                }
+                auto typed_stats = 
dynamic_cast<DateColumnStats*>(column_stats_[i].get());
+                if (typed_stats == nullptr) {
+                    assert(false);
+                    return Status::Invalid("cast typed stats failed");
+                }
+                if (!row.IsNullAt(i)) {
+                    typed_stats->Collect(row.GetDate(i));
+                } else {
+                    typed_stats->Collect(std::nullopt);
+                }
+                break;
+            }
+            default:
+                return Status::NotImplemented(
+                    fmt::format("Do not support arrow type {}", 
static_cast<int32_t>(type)));
+        }
+    }
+    return Status::OK();
+}
+
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_collector.h 
b/src/paimon/core/stats/simple_stats_collector.h
new file mode 100644
index 0000000..c8bd87b
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_collector.h
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "arrow/api.h"
+#include "paimon/format/column_stats.h"
+#include "paimon/result.h"
+#include "paimon/status.h"
+
+namespace arrow {
+class Schema;
+}  // namespace arrow
+
+namespace paimon {
+
+class BinaryRow;
+class ColumnStats;
+
+class SimpleStatsCollector {
+ public:
+    explicit SimpleStatsCollector(const std::shared_ptr<arrow::Schema>& 
schema);
+    Status Collect(const BinaryRow& row);
+    Result<std::vector<std::shared_ptr<ColumnStats>>> GetResult() const {
+        std::vector<std::shared_ptr<ColumnStats>> stats;
+        for (const auto& col_stats : column_stats_) {
+            if (col_stats == nullptr) {
+                return Status::Invalid("column stats is nullptr");
+            }
+            stats.push_back(col_stats);
+        }
+        return stats;
+    }
+
+ private:
+    std::shared_ptr<arrow::Schema> schema_;
+    std::vector<std::shared_ptr<ColumnStats>> column_stats_;
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_collector_test.cpp 
b/src/paimon/core/stats/simple_stats_collector_test.cpp
new file mode 100644
index 0000000..bac30b4
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_collector_test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/stats/simple_stats_collector.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <variant>
+
+#include "arrow/api.h"
+#include "gtest/gtest.h"
+#include "paimon/common/data/binary_array.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/data_define.h"
+#include "paimon/core/stats/simple_stats.h"
+#include "paimon/core/stats/simple_stats_converter.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/testing/utils/binary_row_generator.h"
+#include "paimon/testing/utils/testharness.h"
+
+namespace paimon::test {
+
+TEST(SimpleStatsCollectorTest, TestSimple) {
+    arrow::FieldVector fields = {
+        arrow::field("f0", arrow::boolean()), arrow::field("f1", 
arrow::int8()),
+        arrow::field("f2", arrow::int16()),   arrow::field("f3", 
arrow::int32()),
+        arrow::field("f4", arrow::int64()),   arrow::field("f5", 
arrow::float32()),
+        arrow::field("f6", arrow::float64()), arrow::field("f7", 
arrow::utf8()),
+        arrow::field("f8", arrow::date32()),
+    };
+
+    auto schema = arrow::schema(fields);
+    SimpleStatsCollector collector(schema);
+    auto pool = GetDefaultPool();
+    ASSERT_OK(collector.Collect(BinaryRowGenerator::GenerateRow(
+        {true, static_cast<int8_t>(1), static_cast<int16_t>(1), 
static_cast<int32_t>(1),
+         static_cast<int64_t>(1), static_cast<float>(3.0), 
static_cast<double>(3.0),
+         std::string("abc"), 2025},
+        pool.get())));
+    ASSERT_OK(collector.Collect(BinaryRowGenerator::GenerateRow(
+        {false, static_cast<int8_t>(2), static_cast<int16_t>(2), 
static_cast<int32_t>(2),
+         static_cast<int64_t>(2), static_cast<float>(6.0), 
static_cast<double>(6.0),
+         std::string("bcd"), 2026},
+        pool.get())));
+    ASSERT_OK_AND_ASSIGN(auto col_stats, collector.GetResult());
+    ASSERT_OK_AND_ASSIGN(SimpleStats stats, 
SimpleStatsConverter::ToBinary(col_stats, pool.get()));
+
+    auto expected_stats = BinaryRowGenerator::GenerateStats(
+        {false, static_cast<int8_t>(1), static_cast<int16_t>(1), 
static_cast<int32_t>(1),
+         static_cast<int64_t>(1), static_cast<float>(3.0), 
static_cast<double>(3.0),
+         std::string("abc"), 2025},
+        {true, static_cast<int8_t>(2), static_cast<int16_t>(2), 
static_cast<int32_t>(2),
+         static_cast<int64_t>(2), static_cast<float>(6.0), 
static_cast<double>(6.0),
+         std::string("bcd"), 2026},
+        std::vector<int64_t>({0, 0, 0, 0, 0, 0, 0, 0, 0}), 
GetDefaultPool().get());
+
+    ASSERT_EQ(stats, expected_stats);
+}
+
+TEST(SimpleStatsCollectorTest, TestNull) {
+    arrow::FieldVector fields = {
+        arrow::field("f0", arrow::boolean()), arrow::field("f1", 
arrow::int8()),
+        arrow::field("f2", arrow::int16()),   arrow::field("f3", 
arrow::int32()),
+        arrow::field("f4", arrow::int64()),   arrow::field("f5", 
arrow::float32()),
+        arrow::field("f6", arrow::float64()), arrow::field("f7", 
arrow::utf8()),
+        arrow::field("f8", arrow::date32()),  arrow::field("key", 
arrow::int32()),
+    };
+
+    auto schema = arrow::schema(fields);
+    SimpleStatsCollector collector(schema);
+    auto pool = GetDefaultPool();
+    ASSERT_OK(collector.Collect(
+        BinaryRowGenerator::GenerateRow({NullType(), NullType(), NullType(), 
NullType(), NullType(),
+                                         NullType(), NullType(), NullType(), 
NullType(), 100},
+                                        pool.get())));
+    ASSERT_OK_AND_ASSIGN(auto col_stats, collector.GetResult());
+    ASSERT_EQ(10, col_stats.size());
+    ASSERT_OK_AND_ASSIGN(SimpleStats stats, 
SimpleStatsConverter::ToBinary(col_stats, pool.get()));
+
+    ASSERT_EQ(stats.MinValues(), stats.MaxValues());
+    for (size_t i = 0; i < 9; ++i) {
+        ASSERT_TRUE(stats.MinValues().IsNullAt(i));
+    }
+    ASSERT_EQ(stats.MinValues().GetInt(9), 100);
+    ASSERT_OK_AND_ASSIGN(std::vector<int64_t> expected, 
stats.NullCounts().ToLongArray());
+    ASSERT_EQ(expected, std::vector<int64_t>({1l, 1l, 1l, 1l, 1l, 1l, 1l, 1l, 
1l, 0l}));
+}
+
+TEST(SimpleStatsCollectorTest, TestInvalidPartition) {
+    arrow::FieldVector fields = {arrow::field("f0", arrow::boolean())};
+
+    auto schema = arrow::schema(fields);
+    SimpleStatsCollector collector(schema);
+    auto pool = GetDefaultPool();
+    ASSERT_NOK_WITH_MSG(collector.Collect(BinaryRow::EmptyRow()), "partition 
schema not equal");
+}
+
+}  // namespace paimon::test
diff --git a/src/paimon/core/stats/simple_stats_converter.cpp 
b/src/paimon/core/stats/simple_stats_converter.cpp
new file mode 100644
index 0000000..d6a8075
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_converter.cpp
@@ -0,0 +1,299 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/stats/simple_stats_converter.h"
+
+#include <cstdint>
+#include <optional>
+#include <utility>
+
+#include "fmt/format.h"
+#include "paimon/common/data/binary_array.h"
+#include "paimon/common/data/binary_array_writer.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/binary_row_writer.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/core/stats/simple_stats.h"
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/defs.h"
+#include "paimon/format/column_stats.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class MemoryPool;
+
+Result<SimpleStats> SimpleStatsConverter::ToBinary(
+    const std::vector<std::shared_ptr<ColumnStats>>& stats_vec, MemoryPool* 
pool) {
+    int32_t row_field_count = stats_vec.size();
+    BinaryRow min_values(row_field_count);
+    BinaryRowWriter min_writer(&min_values, 1024, pool);
+    BinaryRow max_values(row_field_count);
+    BinaryRowWriter max_writer(&max_values, 1024, pool);
+    BinaryArray null_counts;
+    BinaryArrayWriter null_writer(&null_counts, row_field_count, 
sizeof(int64_t), pool);
+    for (int32_t i = 0; i < row_field_count; i++) {
+        const auto& stats = stats_vec[i];
+        auto type = stats->GetFieldType();
+        if (stats->NullCount()) {
+            null_writer.WriteLong(i, stats->NullCount().value());
+        } else {
+            null_writer.SetNullAt(i);
+        }
+        if (auto null_stats = 
std::dynamic_pointer_cast<NestedColumnStats>(stats)) {
+            // nested type, e.g., List, Map, Struct
+            min_writer.SetNullAt(i);
+            max_writer.SetNullAt(i);
+            continue;
+        }
+        switch (type) {
+            case FieldType::BOOLEAN: {
+                auto typed_stats = 
std::dynamic_pointer_cast<BooleanColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast BooleanColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteBoolean(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteBoolean(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::TINYINT: {
+                auto typed_stats = 
std::dynamic_pointer_cast<TinyIntColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast TinyIntColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteByte(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteByte(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::SMALLINT: {
+                auto typed_stats = 
std::dynamic_pointer_cast<SmallIntColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast SmallIntColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteShort(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteShort(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::INT: {
+                auto typed_stats = 
std::dynamic_pointer_cast<IntColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast IntColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteInt(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteInt(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::BIGINT: {
+                auto typed_stats = 
std::dynamic_pointer_cast<BigIntColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast LongColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteLong(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteLong(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::FLOAT: {
+                auto typed_stats = 
std::dynamic_pointer_cast<FloatColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast FloatColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteFloat(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteFloat(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::DOUBLE: {
+                auto typed_stats = 
std::dynamic_pointer_cast<DoubleColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast DoubleColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteDouble(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteDouble(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::STRING: {
+                auto typed_stats = 
std::dynamic_pointer_cast<StringColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast StringColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteString(
+                        i, 
BinaryString::FromString(typed_stats->Min().value(), pool));
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteString(
+                        i, 
BinaryString::FromString(typed_stats->Max().value(), pool));
+                }
+                break;
+            }
+            case FieldType::DATE: {
+                auto typed_stats = 
std::dynamic_pointer_cast<DateColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast DateColumnStats failed");
+                }
+                if (typed_stats->Min() == std::nullopt) {
+                    min_writer.SetNullAt(i);
+                } else {
+                    min_writer.WriteInt(i, typed_stats->Min().value());
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    max_writer.SetNullAt(i);
+                } else {
+                    max_writer.WriteInt(i, typed_stats->Max().value());
+                }
+                break;
+            }
+            case FieldType::TIMESTAMP: {
+                auto typed_stats = 
std::dynamic_pointer_cast<TimestampColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast TimestampColumnStats failed");
+                }
+                int32_t precision = typed_stats->GetPrecision();
+                if (typed_stats->Min() == std::nullopt) {
+                    if (!Timestamp::IsCompact(precision)) {
+                        min_writer.WriteTimestamp(i, std::nullopt, precision);
+                    } else {
+                        min_writer.SetNullAt(i);
+                    }
+                } else {
+                    min_writer.WriteTimestamp(i, typed_stats->Min().value(), 
precision);
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    if (!Timestamp::IsCompact(precision)) {
+                        max_writer.WriteTimestamp(i, std::nullopt, precision);
+                    } else {
+                        max_writer.SetNullAt(i);
+                    }
+                } else {
+                    max_writer.WriteTimestamp(i, typed_stats->Max().value(), 
precision);
+                }
+                break;
+            }
+            case FieldType::DECIMAL: {
+                auto typed_stats = 
std::dynamic_pointer_cast<DecimalColumnStats>(stats);
+                if (typed_stats == nullptr) {
+                    return Status::Invalid("cast DecimalColumnStats failed");
+                }
+                auto precision = typed_stats->GetPrecision();
+                auto scale = typed_stats->GetScale();
+                if (typed_stats->Min() == std::nullopt) {
+                    if (!Decimal::IsCompact(precision)) {
+                        min_writer.WriteDecimal(i, std::nullopt, precision);
+                    } else {
+                        min_writer.SetNullAt(i);
+                    }
+                } else {
+                    auto min_decimal = typed_stats->Min().value();
+                    if (min_decimal.Scale() != scale) {
+                        return Status::Invalid(
+                            fmt::format("in SimpleStatsConverter decimal scale 
mismatch: min "
+                                        "decimal scale {}, target type scale 
{}",
+                                        min_decimal.Scale(), scale));
+                    }
+                    min_writer.WriteDecimal(i, min_decimal, precision);
+                }
+                if (typed_stats->Max() == std::nullopt) {
+                    if (!Decimal::IsCompact(precision)) {
+                        max_writer.WriteDecimal(i, std::nullopt, precision);
+                    } else {
+                        max_writer.SetNullAt(i);
+                    }
+                } else {
+                    auto max_decimal = typed_stats->Max().value();
+                    if (max_decimal.Scale() != scale) {
+                        return Status::Invalid(
+                            fmt::format("in SimpleStatsConverter decimal scale 
mismatch: max "
+                                        "decimal scale {}, target type scale 
{}",
+                                        max_decimal.Scale(), scale));
+                    }
+                    max_writer.WriteDecimal(i, max_decimal, precision);
+                }
+                break;
+            }
+            default:
+                return Status::Invalid(fmt::format("invalid type {} for 
SimpleStatsConverter",
+                                                   
static_cast<int32_t>(type)));
+        }
+    }
+    min_writer.Complete();
+    max_writer.Complete();
+    null_writer.Complete();
+    return SimpleStats(min_values, max_values, null_counts);
+}
+
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_converter.h 
b/src/paimon/core/stats/simple_stats_converter.h
new file mode 100644
index 0000000..903cf6e
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_converter.h
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "paimon/format/column_stats.h"
+#include "paimon/result.h"
+
+namespace paimon {
+
+class SimpleStats;
+class ColumnStats;
+class MemoryPool;
+
+class SimpleStatsConverter {
+ public:
+    static Result<SimpleStats> ToBinary(const 
std::vector<std::shared_ptr<ColumnStats>>& stats,
+                                        MemoryPool* pool);
+};
+
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_evolution.cpp 
b/src/paimon/core/stats/simple_stats_evolution.cpp
new file mode 100644
index 0000000..a9356ed
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_evolution.cpp
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/stats/simple_stats_evolution.h"
+
+#include <cassert>
+#include <cstddef>
+#include <string_view>
+
+#include "paimon/common/data/binary_array.h"
+#include "paimon/common/data/binary_array_writer.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/binary_string.h"
+#include "paimon/common/data/generic_row.h"
+#include "paimon/common/data/internal_array.h"
+#include "paimon/common/utils/object_utils.h"
+#include "paimon/common/utils/projected_array.h"
+#include "paimon/common/utils/projected_row.h"
+#include "paimon/core/casting/casted_row.h"
+#include "paimon/core/stats/simple_stats.h"
+#include "paimon/core/utils/field_mapping.h"
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/status.h"
+
+namespace paimon {
+class Bytes;
+class CastExecutor;
+class InternalMap;
+class InternalRow;
+class MemoryPool;
+
+SimpleStatsEvolution::SimpleStatsEvolution(const std::vector<DataField>& 
data_fields,
+                                           const std::vector<DataField>& 
table_fields,
+                                           bool need_mapping,
+                                           const std::shared_ptr<MemoryPool>& 
pool)
+    : need_mapping_(need_mapping), data_fields_(data_fields), 
table_fields_(table_fields) {
+    empty_values_ = std::make_shared<GenericRow>(data_fields.size());
+    empty_null_counts_ = std::make_shared<BinaryArray>();
+    BinaryArrayWriter array_writer(empty_null_counts_.get(), 
data_fields.size(),
+                                   /*element_size=*/sizeof(int64_t), 
pool.get());
+    for (size_t i = 0; i < data_fields.size(); ++i) {
+        array_writer.SetNullAt(i);
+    }
+    array_writer.Complete();
+
+    for (size_t i = 0; i < data_fields.size(); i++) {
+        const auto& data_field = data_fields[i];
+        id_to_data_fields_.emplace(data_field.Id(), std::make_pair(i, 
data_field));
+    }
+    std::map<std::string, DataField> name_to_table_fields;
+    for (const auto& field : table_fields) {
+        name_to_table_fields_.emplace(field.Name(), field);
+    }
+}
+
+class NullCountsEvoArray : public InternalArray {
+ public:
+    NullCountsEvoArray(const std::shared_ptr<InternalArray>& array,
+                       const std::vector<int32_t>& mapping, int64_t 
not_found_value)
+        : array_(array), mapping_(mapping), not_found_value_(not_found_value) {
+        assert(array_);
+    }
+    int32_t Size() const override {
+        return mapping_.size();
+    }
+    bool IsNullAt(int32_t pos) const override {
+        assert(static_cast<size_t>(pos) < mapping_.size());
+        if (mapping_[pos] < 0) {
+            return false;
+        }
+        return array_->IsNullAt(mapping_[pos]);
+    }
+    int64_t GetLong(int32_t pos) const override {
+        if (mapping_[pos] < 0) {
+            return not_found_value_;
+        }
+        return array_->GetLong(mapping_[pos]);
+    }
+
+ private:
+    // ============================= Unsupported Methods 
================================
+    bool GetBoolean(int32_t pos) const override;
+    char GetByte(int32_t pos) const override;
+    int16_t GetShort(int32_t pos) const override;
+    int32_t GetInt(int32_t pos) const override;
+    int32_t GetDate(int32_t pos) const override;
+    float GetFloat(int32_t pos) const override;
+    double GetDouble(int32_t pos) const override;
+    BinaryString GetString(int32_t pos) const override;
+    std::string_view GetStringView(int32_t pos) const override;
+    Decimal GetDecimal(int32_t pos, int32_t precision, int32_t scale) const 
override;
+    Timestamp GetTimestamp(int32_t pos, int32_t precision) const override;
+    std::shared_ptr<Bytes> GetBinary(int32_t pos) const override;
+    std::shared_ptr<InternalArray> GetArray(int32_t pos) const override;
+    std::shared_ptr<InternalMap> GetMap(int32_t pos) const override;
+    std::shared_ptr<InternalRow> GetRow(int32_t pos, int32_t num_fields) const 
override;
+    Result<std::vector<char>> ToBooleanArray() const override;
+    Result<std::vector<char>> ToByteArray() const override;
+    Result<std::vector<int16_t>> ToShortArray() const override;
+    Result<std::vector<int32_t>> ToIntArray() const override;
+    Result<std::vector<int64_t>> ToLongArray() const override;
+    Result<std::vector<float>> ToFloatArray() const override;
+    Result<std::vector<double>> ToDoubleArray() const override;
+
+ private:
+    std::shared_ptr<InternalArray> array_;
+    std::vector<int32_t> mapping_;
+    int64_t not_found_value_;
+};
+
+Result<SimpleStatsEvolution::EvolutionStats> SimpleStatsEvolution::Evolution(
+    const SimpleStats& stats, int64_t row_count,
+    const std::optional<std::vector<std::string>>& dense_fields) {
+    SimpleStatsEvolution::EvolutionStats evolution_stats(
+        std::make_shared<BinaryRow>(stats.MinValues()),
+        std::make_shared<BinaryRow>(stats.MaxValues()),
+        std::make_shared<BinaryArray>(stats.NullCounts()));
+    auto& min_values = evolution_stats.min_values;
+    auto& max_values = evolution_stats.max_values;
+    auto& null_counts = evolution_stats.null_counts;
+    if (dense_fields != std::nullopt && dense_fields.value().empty()) {
+        // optimize for empty dense fields
+        min_values = empty_values_;
+        max_values = empty_values_;
+        null_counts = empty_null_counts_;
+    } else if (dense_fields != std::nullopt) {
+        // create dense index mapping
+        std::vector<int32_t> data_idx_to_dense_idx;
+        std::optional<std::vector<int32_t>> cached_dense_idx =
+            dense_fields_mapping_.Find(dense_fields.value());
+        if (!cached_dense_idx) {
+            std::map<std::string, int32_t> field_name_to_dense_idx =
+                ObjectUtils::CreateIdentifierToIndexMap(dense_fields.value());
+            data_idx_to_dense_idx.resize(data_fields_.size(), -1);
+            for (size_t i = 0; i < data_fields_.size(); ++i) {
+                auto iter = 
field_name_to_dense_idx.find(data_fields_[i].Name());
+                if (iter != field_name_to_dense_idx.end()) {
+                    data_idx_to_dense_idx[i] = iter->second;
+                }
+            }
+            dense_fields_mapping_.Insert(dense_fields.value(), 
data_idx_to_dense_idx);
+        } else {
+            data_idx_to_dense_idx = std::move(cached_dense_idx).value();
+        }
+
+        min_values = std::make_shared<ProjectedRow>(min_values, 
data_idx_to_dense_idx);
+        max_values = std::make_shared<ProjectedRow>(max_values, 
data_idx_to_dense_idx);
+        null_counts = std::make_shared<ProjectedArray>(null_counts, 
data_idx_to_dense_idx);
+    }
+
+    if (!need_mapping_) {
+        return evolution_stats;
+    }
+    return MappingStats(row_count, evolution_stats);
+}
+
+Result<SimpleStatsEvolution::EvolutionStats> 
SimpleStatsEvolution::MappingStats(
+    int64_t row_count, const SimpleStatsEvolution::EvolutionStats& src_stats) 
const {
+    SimpleStatsEvolution::EvolutionStats evolution_stats = src_stats;
+    auto& min_values = evolution_stats.min_values;
+    auto& max_values = evolution_stats.max_values;
+    auto& null_counts = evolution_stats.null_counts;
+
+    std::vector<int32_t> table_idx_to_data_idx(table_fields_.size(), -1);
+    std::vector<DataField> projected_data_fields;
+    projected_data_fields.reserve(table_fields_.size());
+    std::map<int32_t, int32_t> field_id_to_data_idx = 
ObjectUtils::CreateIdentifierToIndexMap(
+        data_fields_, [](const DataField& field) -> int32_t { return 
field.Id(); });
+    for (size_t i = 0; i < table_fields_.size(); ++i) {
+        auto iter = field_id_to_data_idx.find(table_fields_[i].Id());
+        if (iter != field_id_to_data_idx.end()) {
+            table_idx_to_data_idx[i] = iter->second;
+            projected_data_fields.push_back(data_fields_[iter->second]);
+        } else {
+            projected_data_fields.push_back(table_fields_[i]);
+        }
+    }
+    min_values = std::make_shared<ProjectedRow>(min_values, 
table_idx_to_data_idx);
+    max_values = std::make_shared<ProjectedRow>(max_values, 
table_idx_to_data_idx);
+    null_counts =
+        std::make_shared<NullCountsEvoArray>(null_counts, 
table_idx_to_data_idx, row_count);
+
+    // cast mapping
+    PAIMON_ASSIGN_OR_RAISE(
+        std::vector<std::shared_ptr<CastExecutor>> cast_executors,
+        FieldMappingBuilder::CreateDataCastExecutors(table_fields_, 
projected_data_fields));
+
+    bool need_cast = false;
+    for (const auto& executor : cast_executors) {
+        if (executor) {
+            need_cast = true;
+            break;
+        }
+    }
+    if (need_cast) {
+        PAIMON_ASSIGN_OR_RAISE(min_values, CastedRow::Create(cast_executors, 
projected_data_fields,
+                                                             table_fields_, 
min_values));
+        PAIMON_ASSIGN_OR_RAISE(max_values, CastedRow::Create(cast_executors, 
projected_data_fields,
+                                                             table_fields_, 
max_values));
+    }
+    return evolution_stats;
+}
+
+// ============================= Unsupported Methods 
================================
+bool NullCountsEvoArray::GetBoolean(int32_t pos) const {
+    assert(false);
+    return array_->GetBoolean(mapping_[pos]);
+}
+
+char NullCountsEvoArray::GetByte(int32_t pos) const {
+    assert(false);
+    return array_->GetByte(mapping_[pos]);
+}
+
+int16_t NullCountsEvoArray::GetShort(int32_t pos) const {
+    assert(false);
+    return array_->GetShort(mapping_[pos]);
+}
+
+int32_t NullCountsEvoArray::GetInt(int32_t pos) const {
+    assert(false);
+    return array_->GetInt(mapping_[pos]);
+}
+
+int32_t NullCountsEvoArray::GetDate(int32_t pos) const {
+    return NullCountsEvoArray::GetInt(pos);
+}
+
+float NullCountsEvoArray::GetFloat(int32_t pos) const {
+    assert(false);
+    return array_->GetFloat(mapping_[pos]);
+}
+
+double NullCountsEvoArray::GetDouble(int32_t pos) const {
+    assert(false);
+    return array_->GetDouble(mapping_[pos]);
+}
+
+BinaryString NullCountsEvoArray::GetString(int32_t pos) const {
+    assert(false);
+    return array_->GetString(mapping_[pos]);
+}
+
+std::string_view NullCountsEvoArray::GetStringView(int32_t pos) const {
+    assert(false);
+    return array_->GetStringView(mapping_[pos]);
+}
+
+Decimal NullCountsEvoArray::GetDecimal(int32_t pos, int32_t precision, int32_t 
scale) const {
+    assert(false);
+    return array_->GetDecimal(mapping_[pos], precision, scale);
+}
+
+Timestamp NullCountsEvoArray::GetTimestamp(int32_t pos, int32_t precision) 
const {
+    assert(false);
+    return array_->GetTimestamp(mapping_[pos], precision);
+}
+
+std::shared_ptr<Bytes> NullCountsEvoArray::GetBinary(int32_t pos) const {
+    assert(false);
+    return array_->GetBinary(mapping_[pos]);
+}
+
+std::shared_ptr<InternalArray> NullCountsEvoArray::GetArray(int32_t pos) const 
{
+    assert(false);
+    return array_->GetArray(mapping_[pos]);
+}
+
+std::shared_ptr<InternalMap> NullCountsEvoArray::GetMap(int32_t pos) const {
+    assert(false);
+    return array_->GetMap(mapping_[pos]);
+}
+
+std::shared_ptr<InternalRow> NullCountsEvoArray::GetRow(int32_t pos, int32_t 
num_fields) const {
+    assert(false);
+    return array_->GetRow(mapping_[pos], num_fields);
+}
+
+Result<std::vector<char>> NullCountsEvoArray::ToBooleanArray() const {
+    return Status::Invalid("NullCountsEvoArray do not support convert to 
boolean array");
+}
+Result<std::vector<char>> NullCountsEvoArray::ToByteArray() const {
+    return Status::Invalid("NullCountsEvoArray do not support convert to byte 
array");
+}
+Result<std::vector<int16_t>> NullCountsEvoArray::ToShortArray() const {
+    return Status::Invalid("NullCountsEvoArray do not support convert to short 
array");
+}
+Result<std::vector<int32_t>> NullCountsEvoArray::ToIntArray() const {
+    return Status::Invalid("NullCountsEvoArray do not support convert to int 
array");
+}
+Result<std::vector<int64_t>> NullCountsEvoArray::ToLongArray() const {
+    return Status::Invalid("NullCountsEvoArray do not support convert to long 
array");
+}
+Result<std::vector<float>> NullCountsEvoArray::ToFloatArray() const {
+    return Status::Invalid("NullCountsEvoArray do not support convert to float 
array");
+}
+Result<std::vector<double>> NullCountsEvoArray::ToDoubleArray() const {
+    return Status::Invalid("NullCountsEvoArray do not support convert to 
double array");
+}
+
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_evolution.h 
b/src/paimon/core/stats/simple_stats_evolution.h
new file mode 100644
index 0000000..556e7df
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_evolution.h
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "paimon/common/data/binary_array.h"
+#include "paimon/common/data/generic_row.h"
+#include "paimon/common/types/data_field.h"
+#include "paimon/common/utils/concurrent_hash_map.h"
+#include "paimon/core/stats/simple_stats.h"
+#include "paimon/result.h"
+
+namespace paimon {
+class BinaryArray;
+class GenericRow;
+class InternalArray;
+class InternalRow;
+class MemoryPool;
+class SimpleStats;
+
+/// Converter for array of `SimpleStats`.
+class SimpleStatsEvolution {
+ public:
+    SimpleStatsEvolution(const std::vector<DataField>& data_fields,
+                         const std::vector<DataField>& table_fields, bool 
need_mapping,
+                         const std::shared_ptr<MemoryPool>& pool);
+
+    struct EvolutionStats {
+        EvolutionStats() = default;
+
+        EvolutionStats(const std::shared_ptr<InternalRow>& _min_values,
+                       const std::shared_ptr<InternalRow>& _max_values,
+                       const std::shared_ptr<InternalArray>& _null_counts)
+            : min_values(_min_values), max_values(_max_values), 
null_counts(_null_counts) {}
+        std::shared_ptr<InternalRow> min_values;
+        std::shared_ptr<InternalRow> max_values;
+        std::shared_ptr<InternalArray> null_counts;
+    };
+
+    Result<EvolutionStats> Evolution(const SimpleStats& stats, int64_t 
row_count,
+                                     const 
std::optional<std::vector<std::string>>& dense_fields);
+
+    const std::map<int32_t, std::pair<int32_t, DataField>>& 
GetFieldIdToDataField() const {
+        return id_to_data_fields_;
+    }
+
+    const std::map<std::string, DataField>& GetFieldNameToTableField() const {
+        return name_to_table_fields_;
+    }
+
+ private:
+    Result<SimpleStatsEvolution::EvolutionStats> MappingStats(
+        int64_t row_count, const SimpleStatsEvolution::EvolutionStats& 
src_stats) const;
+
+ private:
+    bool need_mapping_ = false;
+    std::vector<DataField> data_fields_;
+    std::vector<DataField> table_fields_;
+    std::shared_ptr<GenericRow> empty_values_;
+    std::shared_ptr<BinaryArray> empty_null_counts_;
+
+    std::map<int32_t, std::pair<int32_t, DataField>> id_to_data_fields_;
+    std::map<std::string, DataField> name_to_table_fields_;
+
+    // dense field names -> idx mapping
+    ConcurrentHashMap<std::vector<std::string>, std::vector<int32_t>, 
VectorStringHashCompare>
+        dense_fields_mapping_;
+};
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_evolution_test.cpp 
b/src/paimon/core/stats/simple_stats_evolution_test.cpp
new file mode 100644
index 0000000..0281341
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_evolution_test.cpp
@@ -0,0 +1,479 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/stats/simple_stats_evolution.h"
+
+#include <cstddef>
+#include <functional>
+#include <string>
+#include <variant>
+
+#include "gtest/gtest.h"
+#include "paimon/common/data/binary_array.h"
+#include "paimon/common/data/binary_row.h"
+#include "paimon/common/data/data_define.h"
+#include "paimon/common/data/internal_array.h"
+#include "paimon/common/utils/internal_row_utils.h"
+#include "paimon/core/schema/schema_manager.h"
+#include "paimon/core/schema/table_schema.h"
+#include "paimon/core/stats/simple_stats.h"
+#include "paimon/data/decimal.h"
+#include "paimon/data/timestamp.h"
+#include "paimon/fs/local/local_file_system.h"
+#include "paimon/memory/memory_pool.h"
+#include "paimon/status.h"
+#include "paimon/testing/utils/binary_row_generator.h"
+#include "paimon/testing/utils/testharness.h"
+
+namespace arrow {
+class Schema;
+}  // namespace arrow
+namespace paimon {
+class FileSystem;
+class InternalRow;
+}  // namespace paimon
+
+namespace paimon::test {
+class SimpleStatsEvolutionTest : public ::testing::Test {
+ public:
+    void SetUp() override {
+        std::string old_schema_str = R"==({
+  "version" : 3,
+  "id" : 0,
+  "fields" : [ {
+    "id" : 0,
+    "name" : "key0",
+    "type" : "INT"
+  }, {
+    "id" : 1,
+    "name" : "key1",
+    "type" : "INT"
+  }, {
+    "id" : 2,
+    "name" : "f0",
+    "type" : "STRING"
+  }, {
+    "id" : 3,
+    "name" : "f1",
+    "type" : "TIMESTAMP(9)"
+  }, {
+    "id" : 4,
+    "name" : "f2",
+    "type" : "DECIMAL(5, 2)"
+  }, {
+    "id" : 5,
+    "name" : "f3",
+    "type" : "STRING"
+  }, {
+    "id" : 6,
+    "name" : "f4",
+    "type" : "DATE"
+  }, {
+    "id" : 7,
+    "name" : "f5",
+    "type" : "BIGINT"
+  } ],
+  "highestFieldId" : 7,
+  "partitionKeys" : [ "key0", "key1" ],
+  "primaryKeys" : [ ],
+  "options" : {
+    "manifest.format" : "orc",
+    "file.format" : "orc"
+  },
+  "timeMillis" : 1732604147800
+})==";
+
+        std::string new_schema_str = R"==({
+  "version" : 3,
+  "id" : 1,
+  "fields" : [ {
+    "id" : 6,
+    "name" : "f4",
+    "type" : "DATE"
+  }, {
+    "id" : 0,
+    "name" : "key0",
+    "type" : "INT"
+  }, {
+    "id" : 1,
+    "name" : "key1",
+    "type" : "INT"
+  }, {
+    "id" : 2,
+    "name" : "f3",
+    "type" : "INT"
+  }, {
+    "id" : 3,
+    "name" : "f1",
+    "type" : "TIMESTAMP(9)"
+  }, {
+    "id" : 4,
+    "name" : "f2",
+    "type" : "DECIMAL(6, 3)"
+  }, {
+    "id" : 5,
+    "name" : "f0",
+    "type" : "BOOLEAN"
+  }, {
+    "id" : 8,
+    "name" : "f6",
+    "type" : "INT"
+  } ],
+  "highestFieldId" : 8,
+  "partitionKeys" : [ "key0", "key1" ],
+  "primaryKeys" : [ ],
+  "options" : {
+    "manifest.format" : "orc",
+    "file.format" : "orc"
+  },
+  "timeMillis" : 1732605243483
+})==";
+
+        ASSERT_OK_AND_ASSIGN(old_schema_, 
TableSchema::CreateFromJson(old_schema_str));
+        ASSERT_OK_AND_ASSIGN(new_schema_, 
TableSchema::CreateFromJson(new_schema_str));
+    }
+
+    SimpleStats CreateStats() const {
+        auto min = BinaryRowGenerator::GenerateRow(
+            {0, 1, std::string("100"), TimestampType(Timestamp(0, 0), 9), 
Decimal(5, 2, 12345),
+             std::string("nO"), 10, NullType()},
+            pool_.get());
+        auto max = BinaryRowGenerator::GenerateRow(
+            {20, 21, std::string("110"), TimestampType(Timestamp(0, 1), 9), 
Decimal(5, 2, 32345),
+             std::string("Yes"), 20, NullType()},
+            pool_.get());
+        auto null_count = BinaryArray::FromLongArray({0, 0, 1, 0, 0, 0, 1, 4}, 
pool_.get());
+        return SimpleStats(min, max, null_count);
+    }
+
+    SimpleStats CreateNewStats() const {
+        auto min =
+            BinaryRowGenerator::GenerateRow({10, 0, 1, 100, 
TimestampType(Timestamp(0, 0), 9),
+                                             Decimal(6, 3, 123450), false, 
NullType()},
+                                            pool_.get());
+        auto max =
+            BinaryRowGenerator::GenerateRow({20, 20, 21, 110, 
TimestampType(Timestamp(0, 1), 9),
+                                             Decimal(6, 3, 323450), true, 
NullType()},
+                                            pool_.get());
+        auto null_count = BinaryArray::FromLongArray({1, 0, 0, 1, 0, 0, 0, 4}, 
pool_.get());
+        return SimpleStats(min, max, null_count);
+    }
+
+    void CheckResultRow(const InternalRow& result, const InternalRow& expected,
+                        const std::vector<DataField>& fields) const {
+        auto collect_variant =
+            [](const InternalRow& row,
+               const std::shared_ptr<arrow::Schema>& schema) -> 
std::vector<VariantType> {
+            EXPECT_OK_AND_ASSIGN(auto getters,
+                                 InternalRowUtils::CreateFieldGetters(schema, 
/*use_view=*/true));
+            std::vector<VariantType> ret;
+            for (const auto& getter : getters) {
+                ret.push_back(getter(row));
+            }
+            return ret;
+        };
+        auto schema = DataField::ConvertDataFieldsToArrowSchema(fields);
+        auto result_variants = collect_variant(result, schema);
+        auto expected_variants = collect_variant(expected, schema);
+        ASSERT_EQ(result_variants, expected_variants);
+    }
+
+    void CheckResultArray(const InternalArray& result, const 
std::vector<VariantType>& expected) {
+        ASSERT_EQ(result.Size(), expected.size());
+        for (size_t i = 0; i < expected.size(); ++i) {
+            if (DataDefine::IsVariantNull(expected[i])) {
+                ASSERT_TRUE(result.IsNullAt(i));
+            } else {
+                ASSERT_FALSE(result.IsNullAt(i));
+                ASSERT_EQ(result.GetLong(i), 
DataDefine::GetVariantValue<int64_t>(expected[i]));
+            }
+        }
+    }
+
+ private:
+    std::shared_ptr<MemoryPool> pool_ = GetDefaultPool();
+    std::shared_ptr<FileSystem> fs_ = std::make_shared<LocalFileSystem>();
+    std::shared_ptr<TableSchema> old_schema_;
+    std::shared_ptr<TableSchema> new_schema_;
+};
+
+TEST_F(SimpleStatsEvolutionTest, TestNoChangeOfFields) {
+    std::string table_root =
+        paimon::test::GetDataDir() +
+        
"orc/append_table_alter_table_with_cast.db/append_table_alter_table_with_cast";
+    SchemaManager manager(fs_, table_root);
+    ASSERT_OK_AND_ASSIGN(std::shared_ptr<TableSchema> schema, 
manager.ReadSchema(/*schema_id=*/0));
+    ASSERT_TRUE(schema);
+
+    SimpleStatsEvolution evo(schema->Fields(), schema->Fields(), 
/*need_mapping=*/false, pool_);
+    SimpleStats stats = CreateStats();
+    ASSERT_OK_AND_ASSIGN(SimpleStatsEvolution::EvolutionStats new_stats,
+                         evo.Evolution(stats, /*row_count=*/4, 
/*dense_fields=*/std::nullopt));
+    CheckResultRow(*(new_stats.min_values), stats.min_values_, 
schema->Fields());
+    CheckResultRow(*(new_stats.max_values), stats.max_values_, 
schema->Fields());
+    CheckResultArray(*(new_stats.null_counts), {0l, 0l, 1l, 0l, 0l, 0l, 1l, 
4l});
+}
+
+TEST_F(SimpleStatsEvolutionTest, TestNoSchemaChangeWithEmptyDenseFields) {
+    std::string table_root =
+        paimon::test::GetDataDir() +
+        
"orc/append_table_alter_table_with_cast.db/append_table_alter_table_with_cast";
+    SchemaManager manager(fs_, table_root);
+    ASSERT_OK_AND_ASSIGN(std::shared_ptr<TableSchema> schema, 
manager.ReadSchema(/*schema_id=*/0));
+    ASSERT_TRUE(schema);
+
+    SimpleStatsEvolution evo(schema->Fields(), schema->Fields(), 
/*need_mapping=*/false, pool_);
+
+    auto null_count = BinaryArray::FromLongArray(std::vector<int64_t>(), 
pool_.get());
+    SimpleStats stats(/*min_values=*/BinaryRow::EmptyRow(), 
/*max_values=*/BinaryRow::EmptyRow(),
+                      null_count);
+
+    ASSERT_OK_AND_ASSIGN(SimpleStatsEvolution::EvolutionStats new_stats,
+                         evo.Evolution(stats, /*row_count=*/4,
+                                       
/*dense_fields=*/std::vector<std::string>({})));
+
+    auto expected_min =
+        BinaryRowGenerator::GenerateRow({NullType(), NullType(), NullType(), 
NullType(), NullType(),
+                                         NullType(), NullType(), NullType()},
+                                        pool_.get());
+    auto expected_max =
+        BinaryRowGenerator::GenerateRow({NullType(), NullType(), NullType(), 
NullType(), NullType(),
+                                         NullType(), NullType(), NullType()},
+                                        pool_.get());
+
+    CheckResultRow(*(new_stats.min_values), expected_min, schema->Fields());
+    CheckResultRow(*(new_stats.max_values), expected_max, schema->Fields());
+    CheckResultArray(*(new_stats.null_counts), {NullType(), NullType(), 
NullType(), NullType(),
+                                                NullType(), NullType(), 
NullType(), NullType()});
+}
+
+TEST_F(SimpleStatsEvolutionTest, TestNoSchemaChangeWithDenseFields) {
+    std::string table_root =
+        paimon::test::GetDataDir() +
+        
"orc/append_table_alter_table_with_cast.db/append_table_alter_table_with_cast";
+    SchemaManager manager(fs_, table_root);
+    ASSERT_OK_AND_ASSIGN(std::shared_ptr<TableSchema> schema, 
manager.ReadSchema(/*schema_id=*/0));
+    ASSERT_TRUE(schema);
+
+    SimpleStatsEvolution evo(schema->Fields(), schema->Fields(), 
/*need_mapping=*/false, pool_);
+
+    auto min = BinaryRowGenerator::GenerateRow(
+        {0, 1, TimestampType(Timestamp(0, 1), 9), NullType()}, pool_.get());
+    auto max = BinaryRowGenerator::GenerateRow(
+        {20, 21, TimestampType(Timestamp(20, 21), 9), NullType()}, 
pool_.get());
+    auto null_count = BinaryArray::FromLongArray({0, 0, 1, 4}, pool_.get());
+    SimpleStats stats(min, max, null_count);
+
+    ASSERT_OK_AND_ASSIGN(
+        SimpleStatsEvolution::EvolutionStats new_stats,
+        evo.Evolution(stats, /*row_count=*/4,
+                      /*dense_fields=*/std::vector<std::string>({"key0", 
"key1", "f1", "f5"})));
+
+    auto expected_min =
+        BinaryRowGenerator::GenerateRow({0, 1, NullType(), 
TimestampType(Timestamp(0, 1), 9),
+                                         NullType(), NullType(), NullType(), 
NullType()},
+                                        pool_.get());
+    auto expected_max =
+        BinaryRowGenerator::GenerateRow({20, 21, NullType(), 
TimestampType(Timestamp(20, 21), 9),
+                                         NullType(), NullType(), NullType(), 
NullType()},
+                                        pool_.get());
+
+    CheckResultRow(*(new_stats.min_values), expected_min, schema->Fields());
+    CheckResultRow(*(new_stats.max_values), expected_max, schema->Fields());
+    CheckResultArray(*(new_stats.null_counts),
+                     {0l, 0l, NullType(), 1l, NullType(), NullType(), 
NullType(), 4l});
+}
+
+TEST_F(SimpleStatsEvolutionTest, TestSchemaChangeWithNoDenseFields) {
+    SimpleStatsEvolution evo(old_schema_->Fields(), new_schema_->Fields(), 
/*need_mapping=*/true,
+                             pool_);
+
+    SimpleStats stats = CreateStats();
+    SimpleStats expected_stats = CreateNewStats();
+
+    ASSERT_OK_AND_ASSIGN(SimpleStatsEvolution::EvolutionStats result_stats,
+                         evo.Evolution(stats, /*row_count=*/4,
+                                       /*dense_fields=*/std::nullopt));
+
+    CheckResultRow(*(result_stats.min_values), expected_stats.min_values_, 
new_schema_->Fields());
+    CheckResultRow(*(result_stats.max_values), expected_stats.max_values_, 
new_schema_->Fields());
+    CheckResultArray(*(result_stats.null_counts), {1l, 0l, 0l, 1l, 0l, 0l, 0l, 
4l});
+}
+
+TEST_F(SimpleStatsEvolutionTest, TestSchemaChangeWithDenseFields) {
+    SimpleStatsEvolution evo(old_schema_->Fields(), new_schema_->Fields(), 
/*need_mapping=*/true,
+                             pool_);
+
+    auto min = BinaryRowGenerator::GenerateRow({std::string("no"), 10, 1234l}, 
pool_.get());
+    auto max = BinaryRowGenerator::GenerateRow({std::string("yes"), 20, 
2234l}, pool_.get());
+    auto null_count = BinaryArray::FromLongArray({0, 1, 1}, pool_.get());
+    SimpleStats old_stats(min, max, null_count);
+
+    ASSERT_OK_AND_ASSIGN(
+        SimpleStatsEvolution::EvolutionStats result_stats,
+        evo.Evolution(old_stats, /*row_count=*/4,
+                      /*dense_fields=*/std::vector<std::string>({"f3", "f4", 
"f5"})));
+
+    auto expected_min = BinaryRowGenerator::GenerateRow(
+        {10, NullType(), NullType(), NullType(), NullType(), NullType(), 
false, NullType()},
+        pool_.get());
+    auto expected_max = BinaryRowGenerator::GenerateRow(
+        {20, NullType(), NullType(), NullType(), NullType(), NullType(), true, 
NullType()},
+        pool_.get());
+
+    CheckResultRow(*(result_stats.min_values), expected_min, 
new_schema_->Fields());
+    CheckResultRow(*(result_stats.max_values), expected_max, 
new_schema_->Fields());
+    CheckResultArray(*(result_stats.null_counts),
+                     {1l, NullType(), NullType(), NullType(), NullType(), 
NullType(), 0l, 4l});
+}
+
+TEST_F(SimpleStatsEvolutionTest, TestNoSchemaChangeWithDenseFieldsWithMap) {
+    std::string table_root =
+        paimon::test::GetDataDir() +
+        
"orc/append_table_alter_table_with_cast.db/append_table_alter_table_with_cast";
+    SchemaManager manager(fs_, table_root);
+    ASSERT_OK_AND_ASSIGN(std::shared_ptr<TableSchema> schema, 
manager.ReadSchema(/*schema_id=*/0));
+    ASSERT_TRUE(schema);
+
+    SimpleStatsEvolution evo(schema->Fields(), schema->Fields(), 
/*need_mapping=*/false, pool_);
+    {
+        // first turn
+        auto min = BinaryRowGenerator::GenerateRow(
+            {0, 1, TimestampType(Timestamp(0, 1), 9), NullType()}, 
pool_.get());
+        auto max = BinaryRowGenerator::GenerateRow(
+            {20, 21, TimestampType(Timestamp(20, 21), 9), NullType()}, 
pool_.get());
+        auto null_count = BinaryArray::FromLongArray({0, 0, 1, 4}, 
pool_.get());
+        SimpleStats stats(min, max, null_count);
+
+        ASSERT_OK_AND_ASSIGN(
+            SimpleStatsEvolution::EvolutionStats new_stats,
+            evo.Evolution(stats, /*row_count=*/4,
+                          /*dense_fields=*/std::vector<std::string>({"key0", 
"key1", "f1", "f5"})));
+
+        auto expected_min =
+            BinaryRowGenerator::GenerateRow({0, 1, NullType(), 
TimestampType(Timestamp(0, 1), 9),
+                                             NullType(), NullType(), 
NullType(), NullType()},
+                                            pool_.get());
+        auto expected_max = BinaryRowGenerator::GenerateRow(
+            {20, 21, NullType(), TimestampType(Timestamp(20, 21), 9), 
NullType(), NullType(),
+             NullType(), NullType()},
+            pool_.get());
+
+        CheckResultRow(*(new_stats.min_values), expected_min, 
schema->Fields());
+        CheckResultRow(*(new_stats.max_values), expected_max, 
schema->Fields());
+        CheckResultArray(*(new_stats.null_counts),
+                         {0l, 0l, NullType(), 1l, NullType(), NullType(), 
NullType(), 4l});
+        ASSERT_EQ(evo.dense_fields_mapping_.Size(), 1);
+    }
+    {
+        // second turn, evo with same dense_fields
+        auto min = BinaryRowGenerator::GenerateRow(
+            {0, 1, TimestampType(Timestamp(0, 1), 9), NullType()}, 
pool_.get());
+        auto max = BinaryRowGenerator::GenerateRow(
+            {20, 21, TimestampType(Timestamp(20, 21), 9), NullType()}, 
pool_.get());
+        auto null_count = BinaryArray::FromLongArray({0, 0, 1, 4}, 
pool_.get());
+        SimpleStats stats(min, max, null_count);
+        ASSERT_OK_AND_ASSIGN(
+            SimpleStatsEvolution::EvolutionStats new_stats,
+            evo.Evolution(stats, /*row_count=*/4,
+                          /*dense_fields=*/std::vector<std::string>({"key0", 
"key1", "f1", "f5"})));
+
+        auto expected_min =
+            BinaryRowGenerator::GenerateRow({0, 1, NullType(), 
TimestampType(Timestamp(0, 1), 9),
+                                             NullType(), NullType(), 
NullType(), NullType()},
+                                            pool_.get());
+        auto expected_max = BinaryRowGenerator::GenerateRow(
+            {20, 21, NullType(), TimestampType(Timestamp(20, 21), 9), 
NullType(), NullType(),
+             NullType(), NullType()},
+            pool_.get());
+
+        CheckResultRow(*(new_stats.min_values), expected_min, 
schema->Fields());
+        CheckResultRow(*(new_stats.max_values), expected_max, 
schema->Fields());
+        CheckResultArray(*(new_stats.null_counts),
+                         {0l, 0l, NullType(), 1l, NullType(), NullType(), 
NullType(), 4l});
+        ASSERT_EQ(evo.dense_fields_mapping_.Size(), 1);
+    }
+    {
+        // third turn, evo with different dense_fields
+        auto min = BinaryRowGenerator::GenerateRow({TimestampType(Timestamp(0, 
1), 9), NullType()},
+                                                   pool_.get());
+        auto max = BinaryRowGenerator::GenerateRow(
+            {TimestampType(Timestamp(20, 21), 9), NullType()}, pool_.get());
+        auto null_count = BinaryArray::FromLongArray({1, 4}, pool_.get());
+        SimpleStats stats(min, max, null_count);
+        ASSERT_OK_AND_ASSIGN(
+            SimpleStatsEvolution::EvolutionStats new_stats,
+            evo.Evolution(stats, /*row_count=*/4,
+                          /*dense_fields=*/std::vector<std::string>({"f1", 
"f5"})));
+
+        auto expected_min = BinaryRowGenerator::GenerateRow(
+            {NullType(), NullType(), NullType(), TimestampType(Timestamp(0, 
1), 9), NullType(),
+             NullType(), NullType(), NullType()},
+            pool_.get());
+        auto expected_max = BinaryRowGenerator::GenerateRow(
+            {NullType(), NullType(), NullType(), TimestampType(Timestamp(20, 
21), 9), NullType(),
+             NullType(), NullType(), NullType()},
+            pool_.get());
+
+        CheckResultRow(*(new_stats.min_values), expected_min, 
schema->Fields());
+        CheckResultRow(*(new_stats.max_values), expected_max, 
schema->Fields());
+        CheckResultArray(*(new_stats.null_counts), {NullType(), NullType(), 
NullType(), 1l,
+                                                    NullType(), NullType(), 
NullType(), 4l});
+        ASSERT_EQ(evo.dense_fields_mapping_.Size(), 2);
+    }
+}
+
+TEST_F(SimpleStatsEvolutionTest, TestGetFieldMap) {
+    std::string table_root =
+        paimon::test::GetDataDir() +
+        
"orc/append_table_alter_table_with_cast.db/append_table_alter_table_with_cast";
+    SchemaManager manager(fs_, table_root);
+    ASSERT_OK_AND_ASSIGN(std::shared_ptr<TableSchema> schema, 
manager.ReadSchema(/*schema_id=*/0));
+    ASSERT_TRUE(schema);
+
+    ASSERT_OK_AND_ASSIGN(std::shared_ptr<TableSchema> schema1, 
manager.ReadSchema(/*schema_id=*/1));
+    ASSERT_TRUE(schema1);
+
+    SimpleStatsEvolution evo(schema->Fields(), schema1->Fields(), 
/*need_mapping=*/true, pool_);
+
+    std::map<int32_t, std::pair<int32_t, DataField>> 
expected_id_to_data_fields;
+    expected_id_to_data_fields[0] = std::make_pair(0, 
schema->GetField("key0").value());
+    expected_id_to_data_fields[1] = std::make_pair(1, 
schema->GetField("key1").value());
+    expected_id_to_data_fields[2] = std::make_pair(2, 
schema->GetField("f0").value());
+    expected_id_to_data_fields[3] = std::make_pair(3, 
schema->GetField("f1").value());
+    expected_id_to_data_fields[4] = std::make_pair(4, 
schema->GetField("f2").value());
+    expected_id_to_data_fields[5] = std::make_pair(5, 
schema->GetField("f3").value());
+    expected_id_to_data_fields[6] = std::make_pair(6, 
schema->GetField("f4").value());
+    expected_id_to_data_fields[7] = std::make_pair(7, 
schema->GetField("f5").value());
+
+    std::map<std::string, DataField> expected_name_to_table_fields;
+    expected_name_to_table_fields["f4"] = schema1->GetField("f4").value();
+    expected_name_to_table_fields["key0"] = schema1->GetField("key0").value();
+    expected_name_to_table_fields["key1"] = schema1->GetField("key1").value();
+    expected_name_to_table_fields["f3"] = schema1->GetField("f3").value();
+    expected_name_to_table_fields["f1"] = schema1->GetField("f1").value();
+    expected_name_to_table_fields["f2"] = schema1->GetField("f2").value();
+    expected_name_to_table_fields["f0"] = schema1->GetField("f0").value();
+    expected_name_to_table_fields["f6"] = schema1->GetField("f6").value();
+
+    ASSERT_EQ(expected_id_to_data_fields, evo.GetFieldIdToDataField());
+    ASSERT_EQ(expected_name_to_table_fields, evo.GetFieldNameToTableField());
+}
+
+}  // namespace paimon::test
diff --git a/src/paimon/core/stats/simple_stats_evolutions.h 
b/src/paimon/core/stats/simple_stats_evolutions.h
new file mode 100644
index 0000000..c72a488
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_evolutions.h
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <memory>
+
+#include "paimon/common/utils/concurrent_hash_map.h"
+#include "paimon/core/schema/table_schema.h"
+#include "paimon/core/stats/simple_stats_evolution.h"
+
+namespace paimon {
+class SimpleStatsEvolutions {
+ public:
+    SimpleStatsEvolutions(const std::shared_ptr<TableSchema>& table_schema,
+                          const std::shared_ptr<MemoryPool>& pool)
+        : pool_(pool), table_schema_(table_schema) {}
+
+    std::shared_ptr<SimpleStatsEvolution> GetOrCreate(
+        const std::shared_ptr<TableSchema>& data_schema) {
+        auto data_schema_id = data_schema->Id();
+        auto cached_evolution = evolutions_.Find(data_schema_id);
+        if (cached_evolution != std::nullopt) {
+            return cached_evolution.value();
+        }
+        bool need_mapping = data_schema_id != table_schema_->Id();
+        auto evolution = std::make_shared<SimpleStatsEvolution>(
+            data_schema->Fields(), table_schema_->Fields(), need_mapping, 
pool_);
+        evolutions_.Insert(data_schema_id, evolution);
+        return evolution;
+    }
+
+ private:
+    std::shared_ptr<MemoryPool> pool_;
+    std::shared_ptr<TableSchema> table_schema_;
+    // scheme_id -> evolution
+    ConcurrentHashMap<int64_t, std::shared_ptr<SimpleStatsEvolution>> 
evolutions_;
+};
+}  // namespace paimon
diff --git a/src/paimon/core/stats/simple_stats_test.cpp 
b/src/paimon/core/stats/simple_stats_test.cpp
new file mode 100644
index 0000000..752534c
--- /dev/null
+++ b/src/paimon/core/stats/simple_stats_test.cpp
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "paimon/core/stats/simple_stats.h"
+
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "paimon/testing/utils/binary_row_generator.h"
+#include "paimon/testing/utils/testharness.h"
+namespace paimon::test {
+TEST(SimpleStatsTest, TestToFromRow) {
+    auto pool = GetDefaultPool();
+    std::vector<SimpleStats> simple_stats_vec = {
+        BinaryRowGenerator::GenerateStats(
+            {false, static_cast<int8_t>(-2), static_cast<int16_t>(-32768),
+             static_cast<int32_t>(-2147483648), NullType(), 
static_cast<int64_t>(-4294967298),
+             static_cast<float>(0.5), 1.141592659, "20250326", NullType()},
+            {true, static_cast<int8_t>(1), static_cast<int16_t>(32767),
+             static_cast<int32_t>(2147483647), NullType(), 
static_cast<int64_t>(4294967296),
+             static_cast<float>(2.0), 3.141592657, "20250327", NullType()},
+            std::vector<int64_t>({1, 0, 0, 1, 4, 1, 0, 0, 1, 0}), pool.get()),
+        BinaryRowGenerator::GenerateStats({NullType()}, {NullType()}, {1}, 
pool.get()),
+        SimpleStats::EmptyStats(),
+    };
+    for (const auto& stats : simple_stats_vec) {
+        auto row = stats.ToRow();
+        ASSERT_OK_AND_ASSIGN(auto result_stats, SimpleStats::FromRow(&row, 
pool.get()));
+        ASSERT_EQ(result_stats, stats);
+        ASSERT_EQ(result_stats.ToString(), stats.ToString());
+    }
+}
+
+}  // namespace paimon::test

Reply via email to