This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 980ad60a66b branch-3.1: [fix](variant) enhance 
max_sparse_column_statistics_size for variant #55124 (#55752)
980ad60a66b is described below

commit 980ad60a66baee21801a93b7e50523bf0967b470
Author: amory <[email protected]>
AuthorDate: Wed Sep 10 16:47:49 2025 +0800

    branch-3.1: [fix](variant) enhance max_sparse_column_statistics_size for 
variant #55124 (#55752)
    
    picked from #55124
---
 be/src/common/config.cpp                           |  2 -
 be/src/common/config.h                             |  3 -
 be/src/common/consts.h                             |  1 +
 .../segment_v2/variant/variant_column_reader.cpp   | 26 ++++++--
 .../segment_v2/variant/variant_column_reader.h     |  3 +
 .../segment_v2/variant_column_writer_impl.cpp      |  2 +-
 .../rowset/segment_v2/variant_stats_calculator.cpp | 16 ++++-
 .../rowset/segment_v2/variant_stats_calculator.h   |  4 +-
 be/src/olap/tablet_meta.cpp                        |  4 ++
 be/src/olap/tablet_schema.cpp                      |  6 ++
 be/src/olap/tablet_schema.h                        | 13 ++++
 be/src/vec/common/schema_util.cpp                  | 18 +++---
 be/src/vec/common/schema_util.h                    |  3 +-
 be/test/olap/rowset/segment_v2/mock/mock_segment.h |  3 +
 .../variant_column_writer_reader_test.cpp          |  8 +--
 .../segment_v2/variant_stats_calculator_test.cpp   | 41 +++++++++----
 be/test/testutil/schema_utils.h                    |  1 +
 be/test/vec/common/schema_util_test.cpp            | 23 ++++---
 .../java/org/apache/doris/catalog/ScalarType.java  |  9 +++
 .../java/org/apache/doris/catalog/VariantType.java | 24 +++++++-
 .../main/java/org/apache/doris/catalog/Column.java |  6 ++
 .../apache/doris/common/util/PropertyAnalyzer.java | 23 +++++++
 .../doris/nereids/parser/LogicalPlanBuilder.java   | 10 ++-
 .../org/apache/doris/nereids/types/DataType.java   |  3 +-
 .../apache/doris/nereids/types/VariantType.java    | 27 ++++++--
 .../java/org/apache/doris/qe/SessionVariable.java  | 14 +++++
 .../apache/doris/common/PropertyAnalyzerTest.java  | 28 +++++++++
 .../org/apache/doris/persist/ScalarTypeTest.java   |  1 +
 gensrc/proto/olap_file.proto                       |  2 +
 gensrc/thrift/Descriptors.thrift                   |  1 +
 .../cloud_p0/conf/regression-conf-custom.groovy    |  1 +
 .../pipeline/p0/conf/regression-conf.groovy        |  3 +-
 ...est_variant_compaction_with_sparse_limit.groovy | 71 ++++++++++++++++++----
 ...est_variant_compaction_with_sparse_limit.groovy | 21 +++----
 34 files changed, 337 insertions(+), 84 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index c9e63c54de6..c2f69661741 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1338,8 +1338,6 @@ DEFINE_Bool(enable_snapshot_action, "false");
 
 DEFINE_mInt32(variant_max_merged_tablet_schema_size, "2048");
 
-DEFINE_mInt32(variant_max_sparse_column_statistics_size, "10000");
-
 DEFINE_mBool(enable_column_type_check, "true");
 // 128 MB
 DEFINE_mInt64(local_exchange_buffer_mem_limit, "134217728");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 6f214361524..8784bd71f1d 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1410,9 +1410,6 @@ DECLARE_Bool(enable_snapshot_action);
 // The max columns size for a tablet schema
 DECLARE_mInt32(variant_max_merged_tablet_schema_size);
 
-// The max sparse column statistics size for a variant column
-DECLARE_mInt32(variant_max_sparse_column_statistics_size);
-
 DECLARE_mInt64(local_exchange_buffer_mem_limit);
 
 DECLARE_mInt64(enable_debug_log_timeout_secs);
diff --git a/be/src/common/consts.h b/be/src/common/consts.h
index 2ec9ae12679..32b4b1e7fa4 100644
--- a/be/src/common/consts.h
+++ b/be/src/common/consts.h
@@ -46,5 +46,6 @@ static constexpr int MAX_DECIMALV2_SCALE = 9;
 
 static constexpr int MAX_DECIMALV3_PRECISION = MAX_DECIMAL256_PRECISION;
 static constexpr int MAX_DECIMALV3_SCALE = MAX_DECIMALV3_PRECISION;
+static constexpr int DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE = 10000;
 } // namespace BeConsts
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp 
b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
index ec0079edc64..f9a2b21958e 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.cpp
@@ -74,9 +74,20 @@ bool VariantColumnReader::exist_in_sparse_column(
 }
 
 bool VariantColumnReader::is_exceeded_sparse_column_limit() const {
-    return !_statistics->sparse_column_non_null_size.empty() &&
-           _statistics->sparse_column_non_null_size.size() >=
-                   config::variant_max_sparse_column_statistics_size;
+    bool exceeded_sparse_column_limit = 
!_statistics->sparse_column_non_null_size.empty() &&
+                                        
_statistics->sparse_column_non_null_size.size() >=
+                                                
_variant_sparse_column_statistics_size;
+    DBUG_EXECUTE_IF("exceeded_sparse_column_limit_must_be_false", {
+        if (exceeded_sparse_column_limit) {
+            throw doris::Exception(
+                    ErrorCode::INTERNAL_ERROR,
+                    "exceeded_sparse_column_limit_must_be_false, 
sparse_column_non_null_size: {} : "
+                    " _variant_sparse_column_statistics_size: {}",
+                    _statistics->sparse_column_non_null_size.size(),
+                    _variant_sparse_column_statistics_size);
+        }
+    })
+    return exceeded_sparse_column_limit;
 }
 
 int64_t VariantColumnReader::get_metadata_size() const {
@@ -308,9 +319,7 @@ Status 
VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator,
 
     // Otherwise the prefix is not exist and the sparse column size is reached 
limit
     // which means the path maybe exist in sparse_column
-    bool exceeded_sparse_column_limit = 
!_statistics->sparse_column_non_null_size.empty() &&
-                                        
_statistics->sparse_column_non_null_size.size() >=
-                                                
config::variant_max_sparse_column_statistics_size;
+    bool exceeded_sparse_column_limit = is_exceeded_sparse_column_limit();
 
     // If the variant column has extracted columns and is a compaction reader, 
then read flat leaves
     // Otherwise read hierarchical data, since the variant subcolumns are 
flattened in schema_util::get_compaction_schema
@@ -391,6 +400,11 @@ Status VariantColumnReader::init(const 
ColumnReaderOptions& opts, const SegmentF
     _statistics = std::make_unique<VariantStatistics>();
     const ColumnMetaPB& self_column_pb = footer.columns(column_id);
     const auto& parent_index = 
opts.tablet_schema->inverted_indexs(self_column_pb.unique_id());
+    // record variant_sparse_column_statistics_size from parent column
+    _variant_sparse_column_statistics_size =
+            opts.tablet_schema->column_by_uid(self_column_pb.unique_id())
+                    .variant_max_sparse_column_statistics_size();
+
     for (int32_t ordinal = 0; ordinal < footer.columns_size(); ++ordinal) {
         const ColumnMetaPB& column_pb = footer.columns(ordinal);
         // Find all columns belonging to the current variant column
diff --git a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h 
b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h
index 21edf5c50bd..f22809eed52 100644
--- a/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h
+++ b/be/src/olap/rowset/segment_v2/variant/variant_column_reader.h
@@ -116,6 +116,9 @@ private:
     std::unique_ptr<VariantStatistics> _statistics;
     // key: subcolumn path, value: subcolumn indexes
     std::unordered_map<std::string, TabletIndexes> _variant_subcolumns_indexes;
+    // variant_sparse_column_statistics_size
+    size_t _variant_sparse_column_statistics_size =
+            BeConsts::DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE;
 };
 
 class VariantRootColumnIterator : public ColumnIterator {
diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp 
b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
index 47890f75d04..6cb59d186da 100644
--- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
@@ -349,7 +349,7 @@ Status VariantColumnWriterImpl::_process_sparse_column(
             it != sparse_data_paths_statistics.end()) {
             ++it->second;
         } else if (sparse_data_paths_statistics.size() <
-                   config::variant_max_sparse_column_statistics_size) {
+                   
_tablet_column->variant_max_sparse_column_statistics_size()) {
             sparse_data_paths_statistics.emplace(path, 1);
         }
     }
diff --git a/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp 
b/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp
index aef71372666..168efa547ec 100644
--- a/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp
+++ b/be/src/olap/rowset/segment_v2/variant_stats_calculator.cpp
@@ -17,6 +17,8 @@
 
 #include "olap/rowset/segment_v2/variant_stats_calculator.h"
 
+#include <gen_cpp/segment_v2.pb.h>
+
 #include "common/logging.h"
 #include "util/simd/bits.h"
 #include "vec/columns/column_nullable.h"
@@ -67,7 +69,13 @@ Status VariantStatsCaculator::calculate_variant_stats(const 
vectorized::Block* b
             // Check if this is a sparse column or sub column
             if (column_path.ends_with("__DORIS_VARIANT_SPARSE__")) {
                 // This is a sparse column from variant column
-                _calculate_sparse_column_stats(*column, column_meta, row_pos, 
num_rows);
+                // get variant_max_sparse_column_statistics_size from 
tablet_schema
+                size_t variant_max_sparse_column_statistics_size =
+                        
_tablet_schema->column_by_uid(tablet_column.parent_unique_id())
+                                .variant_max_sparse_column_statistics_size();
+                _calculate_sparse_column_stats(*column, column_meta,
+                                               
variant_max_sparse_column_statistics_size, row_pos,
+                                               num_rows);
             } else {
                 // This is a sub column from variant column
                 _calculate_sub_column_stats(*column, column_meta, row_pos, 
num_rows);
@@ -79,12 +87,14 @@ Status VariantStatsCaculator::calculate_variant_stats(const 
vectorized::Block* b
 
 void VariantStatsCaculator::_calculate_sparse_column_stats(const 
vectorized::IColumn& column,
                                                            ColumnMetaPB* 
column_meta,
+                                                           size_t 
max_sparse_column_statistics_size,
                                                            size_t row_pos, 
size_t num_rows) {
     // Get or create variant statistics
     VariantStatisticsPB* stats = column_meta->mutable_variant_statistics();
 
     // Use the same logic as the original calculate_variant_stats function
-    vectorized::schema_util::calculate_variant_stats(column, stats, row_pos, 
num_rows);
+    vectorized::schema_util::calculate_variant_stats(
+            column, stats, max_sparse_column_statistics_size, row_pos, 
num_rows);
 
     VLOG_DEBUG << "Sparse column stats updated, non-null size count: "
                << stats->sparse_column_non_null_size_size();
@@ -108,4 +118,4 @@ void 
VariantStatsCaculator::_calculate_sub_column_stats(const vectorized::IColum
                << " (added " << current_non_null_count << " from current 
block)";
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
diff --git a/be/src/olap/rowset/segment_v2/variant_stats_calculator.h 
b/be/src/olap/rowset/segment_v2/variant_stats_calculator.h
index 6ffd74036cb..221c45b781d 100644
--- a/be/src/olap/rowset/segment_v2/variant_stats_calculator.h
+++ b/be/src/olap/rowset/segment_v2/variant_stats_calculator.h
@@ -45,7 +45,9 @@ private:
 
     // Helper method to calculate sparse column statistics
     void _calculate_sparse_column_stats(const vectorized::IColumn& column,
-                                        ColumnMetaPB* column_meta, size_t 
row_pos, size_t num_rows);
+                                        ColumnMetaPB* column_meta,
+                                        size_t 
max_sparse_column_statistics_size, size_t row_pos,
+                                        size_t num_rows);
 
     // Helper method to calculate sub column statistics
     void _calculate_sub_column_stats(const vectorized::IColumn& column, 
ColumnMetaPB* column_meta,
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 3352460fddb..0a246482788 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -491,6 +491,10 @@ void TabletMeta::init_column_from_tcolumn(uint32_t 
unique_id, const TColumn& tco
         column->set_variant_enable_typed_paths_to_sparse(
                 tcolumn.variant_enable_typed_paths_to_sparse);
     }
+    if (tcolumn.__isset.variant_max_sparse_column_statistics_size) {
+        column->set_variant_max_sparse_column_statistics_size(
+                tcolumn.variant_max_sparse_column_statistics_size);
+    }
 }
 
 void TabletMeta::remove_rowset_delete_bitmap(const RowsetId& rowset_id, const 
Version& version) {
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 0aa95b1bbeb..255e15546af 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -613,6 +613,10 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
     if (column.has_variant_max_subcolumns_count()) {
         _variant_max_subcolumns_count = column.variant_max_subcolumns_count();
     }
+    if (column.has_variant_max_sparse_column_statistics_size()) {
+        _variant_max_sparse_column_statistics_size =
+                column.variant_max_sparse_column_statistics_size();
+    }
     if (column.has_pattern_type()) {
         _pattern_type = column.pattern_type();
     }
@@ -704,6 +708,8 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const {
     column->set_variant_max_subcolumns_count(_variant_max_subcolumns_count);
     column->set_pattern_type(_pattern_type);
     
column->set_variant_enable_typed_paths_to_sparse(_variant_enable_typed_paths_to_sparse);
+    column->set_variant_max_sparse_column_statistics_size(
+            _variant_max_sparse_column_statistics_size);
 }
 
 void TabletColumn::add_sub_column(TabletColumn& sub_column) {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index ac8da118734..ad4897adc9b 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -224,6 +224,12 @@ public:
     void set_variant_max_subcolumns_count(int32_t 
variant_max_subcolumns_count) {
         _variant_max_subcolumns_count = variant_max_subcolumns_count;
     }
+
+    void set_variant_max_sparse_column_statistics_size(
+            int32_t variant_max_sparse_column_statistics_size) {
+        _variant_max_sparse_column_statistics_size = 
variant_max_sparse_column_statistics_size;
+    }
+
     int32_t variant_max_subcolumns_count() const { return 
_variant_max_subcolumns_count; }
 
     void set_variant_enable_typed_paths_to_sparse(bool 
variant_enable_typed_paths_to_sparse) {
@@ -234,6 +240,10 @@ public:
         return _variant_enable_typed_paths_to_sparse;
     }
 
+    int32_t variant_max_sparse_column_statistics_size() const {
+        return _variant_max_sparse_column_statistics_size;
+    }
+
 private:
     int32_t _unique_id = -1;
     std::string _col_name;
@@ -286,6 +296,9 @@ private:
     int32_t _variant_max_subcolumns_count = 0;
     PatternTypePB _pattern_type = PatternTypePB::MATCH_NAME_GLOB;
     bool _variant_enable_typed_paths_to_sparse = false;
+    // set variant_max_sparse_column_statistics_size
+    int32_t _variant_max_sparse_column_statistics_size =
+            BeConsts::DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATS_SIZE;
 };
 
 bool operator==(const TabletColumn& a, const TabletColumn& b);
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index 68066cc840d..aa21db80d29 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -889,7 +889,9 @@ Status check_path_stats(const std::vector<RowsetSharedPtr>& 
intputs, RowsetShare
 
         // In input rowsets, some rowsets may have statistics values exceeding 
the maximum limit,
         // which leads to inaccurate statistics
-        if (stats.size() > config::variant_max_sparse_column_statistics_size) {
+        if (stats.size() > output->tablet_schema()
+                                   ->column_by_uid(uid)
+                                   
.variant_max_sparse_column_statistics_size()) {
             // When there is only one segment, we can ensure that the size of 
each path in output stats is accurate
             if (output->num_segments() == 1) {
                 for (const auto& [path, size] : stats) {
@@ -1014,7 +1016,8 @@ void 
get_compaction_subcolumns(TabletSchema::PathsSetInfo& paths_set_info,
             VLOG_DEBUG << "append typed column " << subpath;
         } else if (find_data_types == path_to_data_types.end() || 
find_data_types->second.empty() ||
                    sparse_paths.find(std::string(subpath)) != 
sparse_paths.end() ||
-                   sparse_paths.size() >= 
config::variant_max_sparse_column_statistics_size) {
+                   sparse_paths.size() >=
+                           
parent_column->variant_max_sparse_column_statistics_size()) {
             TabletColumn subcolumn;
             subcolumn.set_name(column_name);
             subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
@@ -1111,7 +1114,8 @@ Status get_compaction_schema(const 
std::vector<RowsetSharedPtr>& rowsets,
 
 // Calculate statistics about variant data paths from the encoded sparse column
 void calculate_variant_stats(const IColumn& encoded_sparse_column,
-                             segment_v2::VariantStatisticsPB* stats, size_t 
row_pos,
+                             segment_v2::VariantStatisticsPB* stats,
+                             size_t max_sparse_column_statistics_size, size_t 
row_pos,
                              size_t num_rows) {
     // Cast input column to ColumnMap type since sparse column is stored as a 
map
     const auto& map_column = assert_cast<const 
ColumnMap&>(encoded_sparse_column);
@@ -1136,19 +1140,17 @@ void calculate_variant_stats(const IColumn& 
encoded_sparse_column,
             }
             // If path doesn't exist and we haven't hit the max statistics 
size limit,
             // add it with count 1
-            else if (count_map.size() < 
config::variant_max_sparse_column_statistics_size) {
+            else if (count_map.size() < max_sparse_column_statistics_size) {
                 count_map.emplace(sparse_path, 1);
             }
         }
     }
 
-    if (stats->sparse_column_non_null_size().size() >
-        config::variant_max_sparse_column_statistics_size) {
+    if (stats->sparse_column_non_null_size().size() > 
max_sparse_column_statistics_size) {
         throw doris::Exception(
                 ErrorCode::INTERNAL_ERROR,
                 "Sparse column non null size: {} is greater than max 
statistics size: {}",
-                stats->sparse_column_non_null_size().size(),
-                config::variant_max_sparse_column_statistics_size);
+                stats->sparse_column_non_null_size().size(), 
max_sparse_column_statistics_size);
     }
 }
 
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index fc5698bf966..ab7fcec2b15 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -172,7 +172,8 @@ Status check_path_stats(const std::vector<RowsetSharedPtr>& 
intputs, RowsetShare
 
 // Calculate statistics about variant data paths from the encoded sparse column
 void calculate_variant_stats(const IColumn& encoded_sparse_column,
-                             segment_v2::VariantStatisticsPB* stats, size_t 
row_pos,
+                             segment_v2::VariantStatisticsPB* stats,
+                             size_t max_sparse_column_statistics_size, size_t 
row_pos,
                              size_t num_rows);
 
 void get_field_info(const Field& field, FieldInfo* info);
diff --git a/be/test/olap/rowset/segment_v2/mock/mock_segment.h 
b/be/test/olap/rowset/segment_v2/mock/mock_segment.h
index 9cf443b2df0..7efe16e7586 100644
--- a/be/test/olap/rowset/segment_v2/mock/mock_segment.h
+++ b/be/test/olap/rowset/segment_v2/mock/mock_segment.h
@@ -49,6 +49,9 @@ public:
 
     // Helper methods for test setup
     void add_column_uid_mapping(int32_t col_uid, int32_t footer_ordinal) {
+        _tablet_schema->_cols.push_back(std::make_shared<TabletColumn>());
+        _tablet_schema->_cols.back()->set_unique_id(col_uid);
+        _tablet_schema->_field_id_to_index[col_uid] = footer_ordinal;
         _column_uid_to_footer_ordinal[col_uid] = footer_ordinal;
     }
 
diff --git 
a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp 
b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
index 8495e7c4e0b..13ca9ebf4ab 100644
--- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
@@ -483,15 +483,15 @@ TEST_F(VariantColumnWriterReaderTest, 
test_write_data_normal) {
     // 13. check statistics size == limit
     auto& variant_stats = variant_column_reader->_statistics;
     EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() <
-                config::variant_max_sparse_column_statistics_size);
-    auto limit = config::variant_max_sparse_column_statistics_size -
+                variant_column_reader->_variant_sparse_column_statistics_size);
+    auto limit = variant_column_reader->_variant_sparse_column_statistics_size 
-
                  variant_stats->sparse_column_non_null_size.size();
     for (int i = 0; i < limit; ++i) {
         std::string key = parent_column.name_lower_case() + ".key10" + 
std::to_string(i);
         variant_stats->sparse_column_non_null_size[key] = 10000;
     }
     EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() ==
-                config::variant_max_sparse_column_statistics_size);
+                variant_column_reader->_variant_sparse_column_statistics_size);
     EXPECT_TRUE(variant_column_reader->is_exceeded_sparse_column_limit());
 
     ColumnIteratorUPtr it2;
@@ -2500,4 +2500,4 @@ TEST_F(VariantColumnWriterReaderTest, 
test_read_with_checksum) {
     }
 }
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp 
b/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp
index edbda054825..5fbb2ed514d 100644
--- a/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp
+++ b/be/test/olap/rowset/segment_v2/variant_stats_calculator_test.cpp
@@ -71,8 +71,10 @@ protected:
     }
 
     // Helper method to create a footer column with path info
-    void add_footer_column_with_path(int32_t parent_unique_id, const 
std::string& path) {
+    void add_footer_column_with_path(int32_t parent_unique_id, const 
std::string& path,
+                                     uint32_t column_id = 0) {
         auto* column_meta = _footer->add_columns();
+        column_meta->set_column_id(column_id);
         column_meta->set_unique_id(100 + _footer->columns_size());
 
         auto* path_info = column_meta->mutable_column_path_info();
@@ -202,19 +204,26 @@ TEST_F(VariantStatsCalculatorTest, 
CalculateVariantStatsWithSubColumn) {
 
 TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithSparseColumn) {
     // Setup footer with sparse column
-    add_footer_column_with_path(1, "sparse_col.__DORIS_VARIANT_SPARSE__");
+    add_footer_column_with_path(-1, "sparse_col");
+    add_footer_column_with_path(1, "sparse_col.__DORIS_VARIANT_SPARSE__", 1);
 
     // Create variant sparse column
+    TabletColumn parent_column = create_variant_column(1, "variant_col", -1, 
"sparse_col");
     TabletColumn sparse_column = create_variant_column(2, 
"variant_col.__DORIS_VARIANT_SPARSE__", 1,
                                                        
"sparse_col.__DORIS_VARIANT_SPARSE__");
+    _tablet_schema->append_column(parent_column);
     _tablet_schema->append_column(sparse_column);
 
-    std::vector<uint32_t> column_ids = {0};
+    std::vector<uint32_t> column_ids = {0, 1};
     VariantStatsCaculator calculator(_footer.get(), _tablet_schema, 
column_ids);
 
     // Create block with map column (sparse column)
     vectorized::Block block;
     auto map_column = create_map_column();
+    auto string_column = vectorized::ColumnString::create();
+    // add parant column to block
+    block.insert({std::move(string_column), 
std::make_shared<vectorized::DataTypeString>(),
+                  "variant_column"});
     block.insert({std::move(map_column),
                   std::make_shared<vectorized::DataTypeMap>(
                           std::make_shared<vectorized::DataTypeString>(),
@@ -225,7 +234,7 @@ TEST_F(VariantStatsCalculatorTest, 
CalculateVariantStatsWithSparseColumn) {
     EXPECT_TRUE(status.ok());
 
     // Check that variant statistics were updated
-    auto& column_meta = _footer->columns(0);
+    auto& column_meta = _footer->columns(1);
     EXPECT_TRUE(column_meta.has_variant_statistics());
 }
 
@@ -275,10 +284,15 @@ TEST_F(VariantStatsCalculatorTest, 
CalculateVariantStatsWithMissingPathInFooter)
 }
 
 TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithMultipleColumns) {
+    // parent column
+    add_footer_column_with_path(-1, "variant");
+    TabletColumn parent_column = create_variant_column(1, "variant", -1, 
"variant");
+    _tablet_schema->append_column(parent_column);
+
     // Setup footer with multiple columns
-    add_footer_column_with_path(1, "sub1");
-    add_footer_column_with_path(1, "sub2.__DORIS_VARIANT_SPARSE__");
-    add_footer_column_with_path(2, "another_sub");
+    add_footer_column_with_path(1, "sub1", 1);
+    add_footer_column_with_path(1, "sub2.__DORIS_VARIANT_SPARSE__", 2);
+    add_footer_column_with_path(2, "another_sub", 3);
 
     // Create multiple variant columns
     TabletColumn sub1 = create_variant_column(2, "variant.sub1", 1, "sub1");
@@ -290,12 +304,17 @@ TEST_F(VariantStatsCalculatorTest, 
CalculateVariantStatsWithMultipleColumns) {
     _tablet_schema->append_column(sparse);
     _tablet_schema->append_column(sub2);
 
-    std::vector<uint32_t> column_ids = {0, 1, 2};
+    std::vector<uint32_t> column_ids = {0, 1, 2, 3};
     VariantStatsCaculator calculator(_footer.get(), _tablet_schema, 
column_ids);
 
     // Create block with multiple columns
     vectorized::Block block;
 
+    // parent column
+    auto string_column = vectorized::ColumnString::create();
+    string_column->insert_data("test", 4);
+    block.insert({std::move(string_column), 
std::make_shared<vectorized::DataTypeString>(),
+                  "variant_column"});
     auto nullable_col1 = create_nullable_column({false, true, false}, {"a", 
"", "c"});
     block.insert({std::move(nullable_col1),
                   std::make_shared<vectorized::DataTypeNullable>(
@@ -320,9 +339,9 @@ TEST_F(VariantStatsCalculatorTest, 
CalculateVariantStatsWithMultipleColumns) {
     EXPECT_TRUE(status.ok());
 
     // Check that statistics were updated for sub columns
-    EXPECT_EQ(_footer->columns(0).none_null_size(), 2);        // sub1: 2 
non-null
-    EXPECT_TRUE(_footer->columns(1).has_variant_statistics()); // sparse column
-    EXPECT_EQ(_footer->columns(2).none_null_size(), 1);        // another_sub: 
2 non-null
+    EXPECT_EQ(_footer->columns(1).none_null_size(), 2);        // sub1: 2 
non-null
+    EXPECT_TRUE(_footer->columns(2).has_variant_statistics()); // sparse column
+    EXPECT_EQ(_footer->columns(3).none_null_size(), 1);        // another_sub: 
2 non-null
 }
 
 TEST_F(VariantStatsCalculatorTest, CalculateVariantStatsWithEmptyBlock) {
diff --git a/be/test/testutil/schema_utils.h b/be/test/testutil/schema_utils.h
index 400d3fcd652..f294a86c462 100644
--- a/be/test/testutil/schema_utils.h
+++ b/be/test/testutil/schema_utils.h
@@ -34,6 +34,7 @@ public:
         column_pb->set_is_nullable(is_nullable);
         if (column_type == "VARIANT") {
             
column_pb->set_variant_max_subcolumns_count(variant_max_subcolumns_count);
+            column_pb->set_variant_max_sparse_column_statistics_size(10000);
         }
     }
 
diff --git a/be/test/vec/common/schema_util_test.cpp 
b/be/test/vec/common/schema_util_test.cpp
index 5dcdf53df06..cb6e38cf007 100644
--- a/be/test/vec/common/schema_util_test.cpp
+++ b/be/test/vec/common/schema_util_test.cpp
@@ -343,7 +343,9 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) {
             construct_column_map_with_random_values(column_map, 200, 100, 
"key_");
 
     // calculate stats
-    schema_util::calculate_variant_stats(*column_map, &stats, 0, 200);
+    size_t max_sparse_column_statistics_size = 10000;
+    schema_util::calculate_variant_stats(*column_map, &stats, 
max_sparse_column_statistics_size, 0,
+                                         200);
     EXPECT_EQ(stats.sparse_column_non_null_size_size(), 
key_value_counts.size());
 
     for (const auto& kv : key_value_counts) {
@@ -356,7 +358,8 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) {
     column_map->clear();
     const auto& key_value_counts2 =
             construct_column_map_with_random_values(column_map, 3000, 100, 
"key_");
-    schema_util::calculate_variant_stats(*column_map, &stats, 0, 3000);
+    schema_util::calculate_variant_stats(*column_map, &stats, 
max_sparse_column_statistics_size, 0,
+                                         3000);
     EXPECT_EQ(stats.sparse_column_non_null_size_size(), 3000);
 
     for (const auto& [path, size] : stats.sparse_column_non_null_size()) {
@@ -372,11 +375,10 @@ TEST_F(SchemaUtilTest, calculate_variant_stats) {
     // test with max size
     column_map->clear();
     const auto& key_value_counts3 = construct_column_map_with_random_values(
-            column_map, config::variant_max_sparse_column_statistics_size, 5, 
"key2_");
-    schema_util::calculate_variant_stats(*column_map, &stats, 0,
-                                         
config::variant_max_sparse_column_statistics_size);
-    EXPECT_EQ(config::variant_max_sparse_column_statistics_size,
-              stats.sparse_column_non_null_size_size());
+            column_map, max_sparse_column_statistics_size, 5, "key2_");
+    schema_util::calculate_variant_stats(*column_map, &stats, 
max_sparse_column_statistics_size, 0,
+                                         max_sparse_column_statistics_size);
+    EXPECT_EQ(max_sparse_column_statistics_size, 
stats.sparse_column_non_null_size_size());
 
     for (const auto& [path, size] : stats.sparse_column_non_null_size()) {
         auto first_size = key_value_counts.find(path) == key_value_counts.end()
@@ -1686,7 +1688,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns) {
     variant.set_unique_id(30);
     variant.set_variant_max_subcolumns_count(3);
     
variant.set_aggregation_method(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE);
-
+    variant.set_variant_max_sparse_column_statistics_size(10000);
     TabletSchemaSPtr schema = std::make_shared<TabletSchema>();
     schema->append_column(variant);
 
@@ -1743,7 +1745,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns) {
     output_schema = std::make_shared<TabletSchema>();
     sparse_paths.clear();
 
-    for (int i = 0; i < config::variant_max_sparse_column_statistics_size + 1; 
++i) {
+    for (int i = 0; i < variant.variant_max_sparse_column_statistics_size() + 
1; ++i) {
         sparse_paths.insert("dummy" + std::to_string(i));
     }
     schema_util::get_compaction_subcolumns(paths_set_info, parent_column, 
schema,
@@ -1760,6 +1762,7 @@ TEST_F(SchemaUtilTest, 
get_compaction_subcolumns_advanced) {
     variant.set_variant_max_subcolumns_count(3);
     
variant.set_aggregation_method(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE);
     variant.set_variant_enable_typed_paths_to_sparse(true);
+    variant.set_variant_max_sparse_column_statistics_size(10000);
     TabletColumn subcolumn;
     subcolumn.set_name("c");
     subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_DATEV2);
@@ -1835,7 +1838,7 @@ TEST_F(SchemaUtilTest, 
get_compaction_subcolumns_advanced) {
     output_schema = std::make_shared<TabletSchema>();
     sparse_paths.clear();
 
-    for (int i = 0; i < config::variant_max_sparse_column_statistics_size + 1; 
++i) {
+    for (int i = 0; i < variant.variant_max_sparse_column_statistics_size() + 
1; ++i) {
         sparse_paths.insert("dummy" + std::to_string(i));
     }
     schema_util::get_compaction_subcolumns(paths_set_info, parent_column, 
schema,
diff --git 
a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
index 53790fbaa4f..e04c3c99b1a 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
@@ -1235,4 +1235,13 @@ public class ScalarType extends Type {
         }
         return false;
     }
+
+    public int getVariantMaxSparseColumnStatisticsSize() {
+        // In the past, variant metadata used the ScalarType type.
+        // Now, we use VariantType, which inherits from ScalarType, as the new 
metadata storage.
+        if (this instanceof VariantType) {
+            return ((VariantType) 
this).getVariantMaxSparseColumnStatisticsSize();
+        }
+        return 0; // The old variant type had a default value of 0.
+    }
 }
diff --git 
a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java 
b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java
index 29342d73ca7..e2a3d76da7f 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/VariantType.java
@@ -46,6 +46,9 @@ public class VariantType extends ScalarType {
     @SerializedName(value = "enableTypedPathsToSparse")
     private boolean enableTypedPathsToSparse = false;
 
+    @SerializedName(value = "variantMaxSparseColumnStatisticsSize")
+    private int variantMaxSparseColumnStatisticsSize = 10000;
+
     private Map<String, String> properties = Maps.newHashMap();
 
     public VariantType() {
@@ -53,6 +56,7 @@ public class VariantType extends ScalarType {
         this.predefinedFields = Lists.newArrayList();
         this.variantMaxSubcolumnsCount = 0;
         this.enableTypedPathsToSparse = false;
+        this.variantMaxSparseColumnStatisticsSize = 10000;
     }
 
     public VariantType(ArrayList<VariantField> fields) {
@@ -81,7 +85,8 @@ public class VariantType extends ScalarType {
     }
 
     public VariantType(ArrayList<VariantField> fields, int 
variantMaxSubcolumnsCount,
-                                                        boolean 
enableTypedPathsToSparse) {
+                                                        boolean 
enableTypedPathsToSparse,
+                                                        int 
variantMaxSparseColumnStatisticsSize) {
         super(PrimitiveType.VARIANT);
         Preconditions.checkNotNull(fields);
         this.predefinedFields = fields;
@@ -90,6 +95,7 @@ public class VariantType extends ScalarType {
         }
         this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount;
         this.enableTypedPathsToSparse = enableTypedPathsToSparse;
+        this.variantMaxSparseColumnStatisticsSize = 
variantMaxSparseColumnStatisticsSize;
     }
 
     @Override
@@ -103,7 +109,8 @@ public class VariantType extends ScalarType {
         if (!predefinedFields.isEmpty()) {
             sb.append(predefinedFields.stream()
                                 .map(variantField -> 
variantField.toSql(depth)).collect(Collectors.joining(",")));
-            if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse) {
+            if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse
+                    && variantMaxSparseColumnStatisticsSize == 10000) {
                 sb.append(">");
                 return sb.toString();
             } else {
@@ -123,6 +130,11 @@ public class VariantType extends ScalarType {
             sb.append("\"variant_enable_typed_paths_to_sparse\" = \"")
                                     
.append(String.valueOf(enableTypedPathsToSparse)).append("\"");
         }
+        if (variantMaxSparseColumnStatisticsSize != 10000) {
+            sb.append(",");
+            sb.append("\"variant_max_sparse_column_statistics_size\" = \"")
+                                    
.append(String.valueOf(variantMaxSparseColumnStatisticsSize)).append("\"");
+        }
         sb.append(")>");
         return sb.toString();
     }
@@ -188,4 +200,12 @@ public class VariantType extends ScalarType {
     public void setEnableTypedPathsToSparse(boolean enableTypedPathsToSparse) {
         this.enableTypedPathsToSparse = enableTypedPathsToSparse;
     }
+
+    public int getVariantMaxSparseColumnStatisticsSize() {
+        return variantMaxSparseColumnStatisticsSize;
+    }
+
+    public void setVariantMaxSparseColumnStatisticsSize(int 
variantMaxSparseColumnStatisticsSize) {
+        this.variantMaxSparseColumnStatisticsSize = 
variantMaxSparseColumnStatisticsSize;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index ab6b423d976..01993b3714e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -683,6 +683,7 @@ public class Column implements GsonPostProcessable {
         }
         tColumn.setClusterKeyId(this.clusterKeyId);
         
tColumn.setVariantEnableTypedPathsToSparse(this.getVariantEnableTypedPathsToSparse());
+        
tColumn.setVariantMaxSparseColumnStatisticsSize(this.getVariantMaxSparseColumnStatisticsSize());
         // ATTN:
         // Currently, this `toThrift()` method is only used from 
CreateReplicaTask.
         // And CreateReplicaTask does not need `defineExpr` field.
@@ -899,6 +900,7 @@ public class Column implements GsonPostProcessable {
         } else if (this.type.isVariantType()) {
             
builder.setVariantMaxSubcolumnsCount(this.getVariantMaxSubcolumnsCount());
             
builder.setVariantEnableTypedPathsToSparse(this.getVariantEnableTypedPathsToSparse());
+            
builder.setVariantMaxSparseColumnStatisticsSize(this.getVariantMaxSparseColumnStatisticsSize());
             // variant may contain predefined structured fields
             addChildren(builder);
         }
@@ -1290,6 +1292,10 @@ public class Column implements GsonPostProcessable {
         this.realDefaultValue = refColumn.realDefaultValue;
     }
 
+    public int getVariantMaxSparseColumnStatisticsSize() {
+        return type.isVariantType() ? ((ScalarType) 
type).getVariantMaxSparseColumnStatisticsSize() : -1;
+    }
+
     public String getExtraInfo() {
         return extraInfo;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
index 35664e768e0..e50a40304be 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
@@ -258,6 +258,9 @@ public class PropertyAnalyzer {
     public static final String SM4 = "SM4";
     public static final String PLAINTEXT = "PLAINTEXT";
 
+    public static final String 
PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE =
+            "variant_max_sparse_column_statistics_size";
+
     public enum RewriteType {
         PUT,      // always put property
         REPLACE,  // replace if exists property
@@ -1876,6 +1879,26 @@ public class PropertyAnalyzer {
         return enableTypedPathsToSparse;
     }
 
+    public static int analyzeVariantMaxSparseColumnStatisticsSize(Map<String, 
String> properties, int defuatValue)
+                                                                               
 throws AnalysisException {
+        int maxSparseColumnStatisticsSize = defuatValue;
+        if (properties != null && 
properties.containsKey(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE)) {
+            String maxSparseColumnStatisticsSizeStr =
+                    
properties.get(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE);
+            try {
+                maxSparseColumnStatisticsSize = 
Integer.parseInt(maxSparseColumnStatisticsSizeStr);
+                if (maxSparseColumnStatisticsSize < 0 || 
maxSparseColumnStatisticsSize > 50000) {
+                    throw new 
AnalysisException("variant_max_sparse_column_statistics_size must between 0 and 
50000 ");
+                }
+            } catch (Exception e) {
+                throw new 
AnalysisException("variant_max_sparse_column_statistics_size format error:" + 
e.getMessage());
+            }
+
+            
properties.remove(PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE);
+        }
+        return maxSparseColumnStatisticsSize;
+    }
+
     public static TEncryptionAlgorithm analyzeTDEAlgorithm(Map<String, String> 
properties) throws AnalysisException {
         String name;
         //if (properties == null || 
!properties.containsKey(PROPERTIES_TDE_ALGORITHM)) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 4bff83fb40f..1f3ff990004 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -3595,12 +3595,16 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor<Object> {
                 
ConnectContext.get().getSessionVariable().getDefaultVariantMaxSubcolumnsCount();
         boolean enableTypedPathsToSparse = ConnectContext.get() == null ? 
false :
                 
ConnectContext.get().getSessionVariable().getDefaultEnableTypedPathsToSparse();
+        int variantMaxSparseColumnStatisticsSize = ConnectContext.get() == 
null ? 0 :
+                
ConnectContext.get().getSessionVariable().getDefaultVariantMaxSparseColumnStatisticsSize();
 
         try {
             variantMaxSubcolumnsCount = PropertyAnalyzer
                                         
.analyzeVariantMaxSubcolumnsCount(properties, variantMaxSubcolumnsCount);
             enableTypedPathsToSparse = PropertyAnalyzer
                                         
.analyzeEnableTypedPathsToSparse(properties, enableTypedPathsToSparse);
+            variantMaxSparseColumnStatisticsSize = 
PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(
+                                        properties, 
variantMaxSparseColumnStatisticsSize);
         } catch (org.apache.doris.common.AnalysisException e) {
             throw new NotSupportedException(e.getMessage());
         }
@@ -3608,7 +3612,8 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor<Object> {
         if (!properties.isEmpty()) {
             throw new NotSupportedException("only support for "
                     + 
PropertyAnalyzer.PROPERTIES_VARIANT_ENABLE_TYPED_PATHS_TO_SPARSE
-                    + " and " + 
PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SUBCOLUMNS_COUNT);
+                    + " and " + 
PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SUBCOLUMNS_COUNT
+                    + " and " + 
PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE);
         }
 
         if (variantMaxSubcolumnsCount == 0 && !fields.isEmpty()) {
@@ -3616,7 +3621,8 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor<Object> {
                     + "when variant has fields, but got " + 
variantMaxSubcolumnsCount);
         }
 
-        return new VariantType(fields, variantMaxSubcolumnsCount, 
enableTypedPathsToSparse);
+        return new VariantType(fields, variantMaxSubcolumnsCount, 
enableTypedPathsToSparse,
+                    variantMaxSparseColumnStatisticsSize);
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
index 0bc1fa127c5..6d1f87340af 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
@@ -400,7 +400,8 @@ public abstract class DataType {
                         .collect(ImmutableList.toImmutableList());
                 return new VariantType(variantFields,
                         ((org.apache.doris.catalog.VariantType) 
type).getVariantMaxSubcolumnsCount(),
-                        ((org.apache.doris.catalog.VariantType) 
type).getEnableTypedPathsToSparse());
+                        ((org.apache.doris.catalog.VariantType) 
type).getEnableTypedPathsToSparse(),
+                        ((org.apache.doris.catalog.VariantType) 
type).getVariantMaxSparseColumnStatisticsSize());
             }
             return new VariantType(0);
         } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java
index f30a328b5db..0d7ee104e2f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java
@@ -46,6 +46,8 @@ public class VariantType extends PrimitiveType {
 
     private boolean enableTypedPathsToSparse = false;
 
+    private int variantMaxSparseColumnStatisticsSize = 10000;
+
     private final List<VariantField> predefinedFields;
 
     // No predefined fields
@@ -61,24 +63,27 @@ public class VariantType extends PrimitiveType {
         this.predefinedFields = 
ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be 
null"));
     }
 
-    public VariantType(List<VariantField> fields, int 
variantMaxSubcolumnsCount, boolean enableTypedPathsToSparse) {
+    public VariantType(List<VariantField> fields, int 
variantMaxSubcolumnsCount, boolean enableTypedPathsToSparse,
+            int variantMaxSparseColumnStatisticsSize) {
         this.predefinedFields = 
ImmutableList.copyOf(Objects.requireNonNull(fields, "fields should not be 
null"));
         this.variantMaxSubcolumnsCount = variantMaxSubcolumnsCount;
         this.enableTypedPathsToSparse = enableTypedPathsToSparse;
+        this.variantMaxSparseColumnStatisticsSize = 
variantMaxSparseColumnStatisticsSize;
     }
 
     @Override
     public DataType conversion() {
         return new 
VariantType(predefinedFields.stream().map(VariantField::conversion)
-                                            .collect(Collectors.toList()), 
variantMaxSubcolumnsCount,
-                                                                               
     enableTypedPathsToSparse);
+                                .collect(Collectors.toList()), 
variantMaxSubcolumnsCount, enableTypedPathsToSparse,
+                                    variantMaxSparseColumnStatisticsSize);
     }
 
     @Override
     public Type toCatalogDataType() {
         org.apache.doris.catalog.VariantType type = new 
org.apache.doris.catalog.VariantType(predefinedFields.stream()
                 .map(VariantField::toCatalogDataType)
-                .collect(Collectors.toCollection(ArrayList::new)), 
variantMaxSubcolumnsCount, enableTypedPathsToSparse);
+                .collect(Collectors.toCollection(ArrayList::new)), 
variantMaxSubcolumnsCount, enableTypedPathsToSparse,
+                     variantMaxSparseColumnStatisticsSize);
         return type;
     }
 
@@ -97,7 +102,8 @@ public class VariantType extends PrimitiveType {
         sb.append("<");
         if (!predefinedFields.isEmpty()) {
             
sb.append(predefinedFields.stream().map(VariantField::toSql).collect(Collectors.joining(",")));
-            if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse) {
+            if (variantMaxSubcolumnsCount == 0 && !enableTypedPathsToSparse
+                    && variantMaxSparseColumnStatisticsSize == 10000) {
                 sb.append(">");
                 return sb.toString();
             } else {
@@ -117,6 +123,12 @@ public class VariantType extends PrimitiveType {
             sb.append("\"variant_enable_typed_paths_to_sparse\" = \"")
                                     
.append(String.valueOf(enableTypedPathsToSparse)).append("\"");
         }
+        if (variantMaxSparseColumnStatisticsSize != 10000) {
+            sb.append(",");
+            sb.append("\"variant_max_sparse_column_statistics_size\" = \"")
+                                    
.append(String.valueOf(variantMaxSparseColumnStatisticsSize))
+                                    .append("\"");
+        }
         sb.append(")>");
         return sb.toString();
     }
@@ -132,6 +144,7 @@ public class VariantType extends PrimitiveType {
         VariantType other = (VariantType) o;
         return this.variantMaxSubcolumnsCount == 
other.variantMaxSubcolumnsCount
                     && this.enableTypedPathsToSparse == 
other.enableTypedPathsToSparse
+                    && this.variantMaxSparseColumnStatisticsSize == 
other.variantMaxSparseColumnStatisticsSize
                     && Objects.equals(predefinedFields, 
other.predefinedFields);
     }
 
@@ -157,4 +170,8 @@ public class VariantType extends PrimitiveType {
     public int getVariantMaxSubcolumnsCount() {
         return variantMaxSubcolumnsCount;
     }
+
+    public int getVariantMaxSparseColumnStatisticsSize() {
+        return variantMaxSparseColumnStatisticsSize;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index c625573a4ab..5b959c8b981 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -748,6 +748,9 @@ public class SessionVariable implements Serializable, 
Writable {
     public static final String PREFER_UDF_OVER_BUILTIN = 
"prefer_udf_over_builtin";
     public static final String ENABLE_ADD_INDEX_FOR_NEW_DATA = 
"enable_add_index_for_new_data";
 
+    public static final String 
DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE =
+                                                            
"default_variant_max_sparse_column_statistics_size";
+
     /**
      * If set false, user couldn't submit analyze SQL and FE won't allocate 
any related resources.
      */
@@ -2625,6 +2628,13 @@ public class SessionVariable implements Serializable, 
Writable {
     })
     public boolean enableAddIndexForNewData = false;
 
+    @VariableMgr.VarAttr(
+            name = DEFAULT_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
+            needForward = true,
+            fuzzy = true
+    )
+    public int defaultVariantMaxSparseColumnStatisticsSize = 10000;
+
     // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to 
generate some variables,
     // not the default value set in the code.
     @SuppressWarnings("checkstyle:Indentation")
@@ -4998,5 +5008,9 @@ public class SessionVariable implements Serializable, 
Writable {
     public boolean getDefaultEnableTypedPathsToSparse() {
         return defaultEnableTypedPathsToSparse;
     }
+
+    public int getDefaultVariantMaxSparseColumnStatisticsSize() {
+        return defaultVariantMaxSparseColumnStatisticsSize;
+    }
 }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
index ab7291eaf16..ddd813df376 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/PropertyAnalyzerTest.java
@@ -334,4 +334,32 @@ public class PropertyAnalyzerTest {
                     e.getMessage());
         }
     }
+
+    @Test
+    public void testAnalyzeVariantMaxSparseColumnStatisticsSize() throws 
AnalysisException {
+        Map<String, String> properties = Maps.newHashMap();
+        
properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
 "-1");
+        try {
+            
PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0);
+            Assertions.fail("Expected AnalysisException was not thrown");
+        } catch (AnalysisException e) {
+            Assertions.assertNotNull(e.getMessage());
+        }
+        properties.clear();
+        
properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
 "50001");
+        try {
+            
PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0);
+            Assertions.fail("Expected AnalysisException was not thrown");
+        } catch (AnalysisException e) {
+            Assertions.assertNotNull(e.getMessage());
+        }
+        properties.clear();
+        
properties.put(PropertyAnalyzer.PROPERTIES_VARIANT_MAX_SPARSE_COLUMN_STATISTICS_SIZE,
 "invalid");
+        try {
+            
PropertyAnalyzer.analyzeVariantMaxSparseColumnStatisticsSize(properties, 0);
+            Assertions.fail("Expected AnalysisException was not thrown");
+        } catch (AnalysisException e) {
+            Assertions.assertNotNull(e.getMessage());
+        }
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java
index b1f2039e356..3fac71bfc33 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/persist/ScalarTypeTest.java
@@ -36,5 +36,6 @@ public class ScalarTypeTest {
         Assert.assertEquals(scalarType.getPrimitiveType(), 
scalarType2.getPrimitiveType());
         Assert.assertEquals(scalarType.getVariantMaxSubcolumnsCount(), 0);
         Assert.assertEquals(scalarType.getVariantEnableTypedPathsToSparse(), 
false);
+        
Assert.assertEquals(scalarType.getVariantMaxSparseColumnStatisticsSize(), 0);
     }
 }
diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto
index 8136500491c..1e97d5ad476 100644
--- a/gensrc/proto/olap_file.proto
+++ b/gensrc/proto/olap_file.proto
@@ -344,6 +344,8 @@ message ColumnPB {
     optional bool variant_enable_typed_paths_to_sparse = 27 [default = false];
     // this field is only used during flexible partial update load
     optional bool is_on_update_current_timestamp = 28 [default = false];
+    // variant_max_sparse_column_statistics_size 
+    optional int32 variant_max_sparse_column_statistics_size = 29 [default = 
10000];
 }
 
 // Dictionary of Schema info, to reduce TabletSchemaCloudPB fdb kv size
diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift
index f1ef06103ef..f14a8db707c 100644
--- a/gensrc/thrift/Descriptors.thrift
+++ b/gensrc/thrift/Descriptors.thrift
@@ -51,6 +51,7 @@ struct TColumn {
     21: optional TPatternType pattern_type
     22: optional bool variant_enable_typed_paths_to_sparse = false;
     23: optional bool is_on_update_current_timestamp = false
+    24: optional i32 variant_max_sparse_column_statistics_size = 10000
 }
 
 struct TSlotDescriptor {
diff --git 
a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy 
b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy
index 2b5b4007635..daeb83f3d20 100644
--- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy
+++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy
@@ -64,6 +64,7 @@ excludeDirectories = "000_the_start_sentinel_do_not_touch," + 
// keep this line
     "ccr_mow_syncer_p0," +
     "hdfs_vault_p2," +
     "inject_hdfs_vault_p0," +
+    "variant_p0/nested," +
     "plsql_p0," + // plsql is not developped any more, add by sk.
     "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line
 
diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy 
b/regression-test/pipeline/p0/conf/regression-conf.groovy
index efe158f0887..52ee29f0926 100644
--- a/regression-test/pipeline/p0/conf/regression-conf.groovy
+++ b/regression-test/pipeline/p0/conf/regression-conf.groovy
@@ -87,7 +87,8 @@ excludeDirectories = "000_the_start_sentinel_do_not_touch," + 
// keep this line
     "nereids_rules_p0/subquery," +
     "unique_with_mow_c_p0," +
     "workload_manager_p1," +
-    "plsql_p0," + // plsql is not developped any more
+    "plsql_p0," + // plsql is not developped any more, add by sk
+    "variant_p0/nested," +
     "zzz_the_end_sentinel_do_not_touch"// keep this line as the last line
 
 customConf1 = "test_custom_conf_value"
diff --git 
a/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy
 
b/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy
index 6e3d6a12ce8..67dd512e0ed 100644
--- 
a/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy
+++ 
b/regression-test/suites/fault_injection_p0/test_variant_compaction_with_sparse_limit.groovy
@@ -23,12 +23,6 @@ suite("test_compaction_variant_with_sparse_limit", 
"nonConcurrent") {
     def backendId_to_backendHttpPort = [:]
     getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
 
-    def set_be_config = { key, value ->
-    for (String backend_id: backendId_to_backendIP.keySet()) {
-        def (code, out, err) = 
update_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), key, value)
-            logger.info("update config: code=" + code + ", out=" + out + ", 
err=" + err)
-        }
-    }
     try {
         String backend_id = backendId_to_backendIP.keySet()[0]
         def (code, out, err) = 
show_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id))
@@ -45,16 +39,17 @@ suite("test_compaction_variant_with_sparse_limit", 
"nonConcurrent") {
             }
         }
 
-        set_be_config("variant_max_sparse_column_statistics_size", "2")
-        int max_subcolumns_count = Math.floor(Math.random() * 5) 
+
+        int max_subcolumns_count = Math.floor(Math.random() * 5)
+        int max_sparse_column_statistics_size = 2
         if (max_subcolumns_count == 1) {
             max_subcolumns_count = 0
         }
         def create_table = { tableName, buckets="auto", key_type="DUPLICATE" ->
             sql "DROP TABLE IF EXISTS ${tableName}"
-            def var_def = "variant 
<properties(\"variant_max_subcolumns_count\" = \"${max_subcolumns_count}\")>"
+            def var_def = "variant 
<properties(\"variant_max_subcolumns_count\" = \"${max_subcolumns_count}\", 
\"variant_max_sparse_column_statistics_size\" = 
\"${max_sparse_column_statistics_size}\")>"
             if (key_type == "AGGREGATE") {
-                var_def = "variant 
<properties(\"variant_max_subcolumns_count\" = \"${max_subcolumns_count}\")> 
replace"
+                var_def = "variant 
<properties(\"variant_max_subcolumns_count\" = \"${max_subcolumns_count}\", 
\"variant_max_sparse_column_statistics_size\" = 
\"${max_sparse_column_statistics_size}\")> replace"
             }
             sql """
                 CREATE TABLE IF NOT EXISTS ${tableName} (
@@ -66,6 +61,25 @@ suite("test_compaction_variant_with_sparse_limit", 
"nonConcurrent") {
                 properties("replication_num" = "1", "disable_auto_compaction" 
= "true");
             """
         }
+        // check the sparse column must not be read if max_subcolumns_count is 0
+        def check_sparse_column_must_not_be_read = { tableName ->
+            if (max_subcolumns_count == 0) {
+                try {
+                    
GetDebugPoint().enableDebugPointForAllBEs("exist_in_sparse_column_must_be_false")
+                    sql """ select v['a'], v['b'], v['c'], v['x'], v['y'], 
v['z'], v['m'], v['l'], v['g'], v['z'], v['sala'], v['dddd'] from 
${tableName}"""
+                } finally {
+                    
GetDebugPoint().disableDebugPointForAllBEs("exist_in_sparse_column_must_be_false")
+                }
+            } else if (max_subcolumns_count > 1) {
+                // here will aways false
+                try {
+                    
GetDebugPoint().enableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false")
+                    sql """ select v['mmm'] from ${tableName} where k = 30"""
+                } finally {
+                    
GetDebugPoint().disableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false")
+                }
+            }
+        }
         def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"]
         // def key_types = ["AGGREGATE"]
         for (int i = 0; i < key_types.size(); i++) {
@@ -119,8 +133,41 @@ suite("test_compaction_variant_with_sparse_limit", 
"nonConcurrent") {
             qt_sql_55 "select cast(v['b'] as string), cast(v['b']['c'] as 
string) from  ${tableName} where cast(v['b'] as string) != 'null' and 
cast(v['b'] as string) != '{}' order by k desc limit 10;"
         }
 
+    } catch (e) {
+        logger.info("catch exception: ${e}")
     } finally {
-        // set back to default
-        set_be_config("variant_max_sparse_column_statistics_size", "10000")
+        sql "DROP TABLE IF EXISTS simple_variant_DUPLICATE"
+        sql "DROP TABLE IF EXISTS simple_variant_UNIQUE"
+        sql "DROP TABLE IF EXISTS simple_variant_AGGREGATE"
+    }
+
+    // test  variant_max_sparse_column_statistics_size debug error case
+    sql "DROP TABLE IF EXISTS tn_simple_variant_DUPLICATE"
+    sql """
+        CREATE TABLE IF NOT EXISTS tn_simple_variant_DUPLICATE (
+            k bigint,
+            v variant <properties(\"variant_max_subcolumns_count\" = \"2\", 
\"variant_max_sparse_column_statistics_size\" = \"1\")>
+        )
+        DUPLICATE KEY(`k`)
+        DISTRIBUTED BY HASH(k) BUCKETS 1
+        properties("replication_num" = "1", "disable_auto_compaction" = 
"true");
+    """
+    // here will always true
+    sql """insert into tn_simple_variant_DUPLICATE values (1, '{"a" : 1, "b" : 
2}');"""
+    
GetDebugPoint().enableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false")
+    test {
+        sql """ select v['a'] from tn_simple_variant_DUPLICATE where k = 1"""
+        exception null
     }
+
+    // here will always false
+    sql """ truncate table tn_simple_variant_DUPLICATE --force ; """
+    sql """insert into tn_simple_variant_DUPLICATE values (1, '{"d" : "ddd",  
"s" : "fff", "da": "ddd", "m": 111}');"""
+    test {
+        sql """ select v['m'] from tn_simple_variant_DUPLICATE"""
+        exception "exceeded_sparse_column_limit_must_be_false"
+    }
+
+    
GetDebugPoint().disableDebugPointForAllBEs("exceeded_sparse_column_limit_must_be_false")
+    
 }
diff --git 
a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
 
b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
index 0ab363d5671..d47c486047e 100644
--- 
a/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
+++ 
b/regression-test/suites/variant_p0/predefine/test_variant_compaction_with_sparse_limit.groovy
@@ -23,12 +23,6 @@ suite("test_compaction_variant_predefine_with_sparse_limit", 
"nonConcurrent") {
     def backendId_to_backendHttpPort = [:]
     getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
 
-    def set_be_config = { key, value ->
-    for (String backend_id: backendId_to_backendIP.keySet()) {
-        def (code, out, err) = 
update_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), key, value)
-            logger.info("update config: code=" + code + ", out=" + out + ", 
err=" + err)
-        }
-    }
     try {
         String backend_id = backendId_to_backendIP.keySet()[0]
         def (code, out, err) = 
show_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id))
@@ -45,13 +39,14 @@ 
suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") {
             }
         }
 
-        set_be_config("variant_max_sparse_column_statistics_size", "2")
+        int max_sparse_column_statistics_size = 2
         def create_table = { tableName, buckets="auto", key_type="DUPLICATE" ->
             sql "DROP TABLE IF EXISTS ${tableName}"
-            def var_def = "variant <'sala' : int, 'ddd' : double, 'z' : 
double>"
+            def var_def = "variant <MATCH_NAME 'sala' : int, MATCH_NAME 'ddd' 
: double, MATCH_NAME 'z' : double, 
properties(\"variant_max_sparse_column_statistics_size\" = 
\"${max_sparse_column_statistics_size}\")>"
             if (key_type == "AGGREGATE") {
-                var_def = "variant <'sala' : int, 'ddd' : double, 'z' : 
double> replace"
+                var_def = "variant <MATCH_NAME 'sala' : int, MATCH_NAME 'ddd' 
: double, MATCH_NAME 'z' : double, 
properties(\"variant_max_sparse_column_statistics_size\" = 
\"${max_sparse_column_statistics_size}\")> replace"
             }
+
             sql """
                 CREATE TABLE IF NOT EXISTS ${tableName} (
                     k bigint,
@@ -61,6 +56,9 @@ suite("test_compaction_variant_predefine_with_sparse_limit", 
"nonConcurrent") {
                 DISTRIBUTED BY HASH(k) BUCKETS ${buckets}
                 properties("replication_num" = "1", "disable_auto_compaction" 
= "true");
             """
+            def create_tbl_res = sql """ show create table ${tableName} """
+            logger.info("${create_tbl_res}")
+            
assertTrue(create_tbl_res.toString().contains("variant_max_sparse_column_statistics_size"))
         }
         def key_types = ["DUPLICATE", "UNIQUE", "AGGREGATE"]
         // def key_types = ["AGGREGATE"]
@@ -132,7 +130,8 @@ 
suite("test_compaction_variant_predefine_with_sparse_limit", "nonConcurrent") {
             order_qt_select "select * from ${tableName} order by k, cast(v as 
string) limit 5;"
         }
     } finally {
-        // set back to default
-        set_be_config("variant_max_sparse_column_statistics_size", "10000")
+        sql "DROP TABLE IF EXISTS simple_variant_DUPLICATE"
+        sql "DROP TABLE IF EXISTS simple_variant_UNIQUE"
+        sql "DROP TABLE IF EXISTS simple_variant_AGGREGATE"
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to