(doris) branch master updated: [fix](variant) fix the reading core caused by inserting nested column and scalar column in variant sub-column (#53083)

eldenmoon Wed, 23 Jul 2025 23:31:22 -0700

This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 958e9eefe49 [fix](variant) fix the reading core caused by inserting 
nested column and scalar column in variant sub-column (#53083)
958e9eefe49 is described below

commit 958e9eefe49ebe613d8c9b930b3e0f6e8f887e9c
Author: amory <[email protected]>
AuthorDate: Thu Jul 24 14:31:09 2025 +0800

    [fix](variant) fix the reading core caused by inserting nested column and 
scalar column in variant sub-column (#53083)
    
    this pr main fix the problem which if we create table with
    ```variant_enable_flatten_nested```
    then insert variant data:
    ```'{"nested":{"a":"1"}}``` and 
```'{"nested":[{"a":1,"c":1.1},{"b":"1"}]}'```
    we will meet core for reading
    so we should forbid this table property
    and for old data we insert different structure data will meet some error 
like this:
    ```
    mysql> insert into vs values (2, '{"nested":{"a":"1"}}');
    Query OK, 1 row affected (0.22 sec)
    {'label':'label_165a8209698c4391_988c6532615017c4', 'status':'VISIBLE',
    'txnId':'1011'}
    
    mysql> insert into vs values (1,
    '{"nested":[{"a":1,"c":1.1},{"b":"1"}]}');
    ERROR 1105 (HY000): errCode = 2, detailMessage =
    (10.16.10.6)[INTERNAL_ERROR]tablet 1752145213719 failed on majority
    backends: [DATA_QUALITY_ERROR]PStatus:
    (10.16.10.6)[DATA_QUALITY_ERROR]Ambiguous paths: v.nested.a vs
    v.nested.a with different nested part true vs false
    ```
---
 be/src/cloud/cloud_meta_mgr.cpp                    |  13 +-
 be/src/cloud/schema_cloud_dictionary_cache.cpp     |  36 ++-
 be/src/olap/base_tablet.cpp                        |   4 +-
 be/src/olap/rowset/beta_rowset_writer.cpp          |   7 +-
 be/src/olap/rowset/beta_rowset_writer.h            |   2 +-
 .../rowset/segment_v2/hierarchical_data_reader.h   |   6 +-
 be/src/service/internal_service.cpp                |  18 +-
 be/src/vec/common/schema_util.cpp                  |  99 ++++++--
 be/src/vec/common/schema_util.h                    |  17 +-
 .../cloud/test_schema_cloud_dictionary_cache.cpp   |  54 +++++
 be/test/common/schema_util_test.cpp                | 269 +++++++++++++++++++++
 .../apache/doris/datasource/InternalCatalog.java   |   6 +
 .../java/org/apache/doris/qe/SessionVariable.java  |  15 ++
 gensrc/thrift/AgentService.thrift                  |   2 +-
 regression-test/data/variant_p0/nested/load.out    | Bin 0 -> 7411 bytes
 regression-test/data/variant_p0/nested/sql/q01.out | Bin 0 -> 377 bytes
 .../suites/variant_p0/delete_update.groovy         |   1 +
 regression-test/suites/variant_p0/nested.groovy    |   2 +-
 .../suites/variant_p0/nested/load.groovy           | 198 +++++++++++++++
 .../suites/variant_p0/nested/sql/q01.sql           |  13 +
 regression-test/suites/variant_p0/nested2.groovy   |   1 +
 .../test_double_write_when_schema_change.groovy    |   1 +
 22 files changed, 722 insertions(+), 42 deletions(-)

diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index 9397a426bc5..024c16c3533 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -1049,11 +1049,18 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta& 
rs_meta, const std::string&
     // Replace schema dictionary keys based on the rowset's index ID to 
maintain schema consistency.
     CloudStorageEngine& engine = 
ExecEnv::GetInstance()->storage_engine().to_cloud();
     // if not enable dict cache, then directly return true to avoid refresh
-    bool replaced =
+    Status replaced_st =
             config::variant_use_cloud_schema_dict_cache
                     ? 
engine.get_schema_cloud_dictionary_cache().replace_schema_to_dict_keys(
                               rs_meta_pb.index_id(), req.mutable_rowset_meta())
-                    : true;
+                    : Status::OK();
+    // if the replaced_st is not ok and alse not NotFound, then we need to 
just return the replaced_st
+    VLOG_DEBUG << "replace schema to dict keys, replaced_st: " << 
replaced_st.to_string()
+               << ", replaced_st.is<ErrorCode::NOT_FOUND>(): "
+               << replaced_st.is<ErrorCode::NOT_FOUND>();
+    if (!replaced_st.ok() && !replaced_st.is<ErrorCode::NOT_FOUND>()) {
+        return replaced_st;
+    }
     Status st = retry_rpc("commit rowset", req, &resp, 
&MetaService_Stub::commit_rowset);
     if (!st.ok() && resp.status().code() == MetaServiceCode::ALREADY_EXISTED) {
         if (existed_rs_meta != nullptr && resp.has_existed_rowset_meta()) {
@@ -1067,7 +1074,7 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta& 
rs_meta, const std::string&
     // If dictionary replacement fails, it may indicate that the local schema 
dictionary is outdated.
     // Refreshing the dictionary here ensures that the rowset metadata is 
updated with the latest schema definitions,
     // which is critical for maintaining consistency between the rowset and 
its corresponding schema.
-    if (!replaced) {
+    if (replaced_st.is<ErrorCode::NOT_FOUND>()) {
         RETURN_IF_ERROR(
                 
engine.get_schema_cloud_dictionary_cache().refresh_dict(rs_meta_pb.index_id()));
     }
diff --git a/be/src/cloud/schema_cloud_dictionary_cache.cpp 
b/be/src/cloud/schema_cloud_dictionary_cache.cpp
index 25f0b232702..9fdde420ecb 100644
--- a/be/src/cloud/schema_cloud_dictionary_cache.cpp
+++ b/be/src/cloud/schema_cloud_dictionary_cache.cpp
@@ -19,6 +19,7 @@
 
 #include <fmt/core.h>
 #include <gen_cpp/olap_file.pb.h>
+#include <vec/common/schema_util.h>
 
 #include <functional>
 #include <memory>
@@ -62,6 +63,27 @@ SchemaCloudDictionarySPtr 
SchemaCloudDictionaryCache::_lookup(int64_t index_id)
     return dict;
 }
 
+Status check_path_amibigus(const SchemaCloudDictionary& schema, 
RowsetMetaCloudPB* rowset_meta) {
+    // if enable_variant_flatten_nested is false, then we don't need to check 
path amibigus
+    if (!rowset_meta->tablet_schema().enable_variant_flatten_nested()) {
+        return Status::OK();
+    }
+    // try to get all the paths in the rowset meta
+    vectorized::PathsInData all_paths;
+    for (const auto& column : rowset_meta->tablet_schema().column()) {
+        vectorized::PathInData path_in_data;
+        path_in_data.from_protobuf(column.column_path_info());
+        all_paths.push_back(path_in_data);
+    }
+    // try to get all the paths in the schema dict
+    for (const auto& [_, column] : schema.column_dict()) {
+        vectorized::PathInData path_in_data;
+        path_in_data.from_protobuf(column.column_path_info());
+        all_paths.push_back(path_in_data);
+    }
+    
RETURN_IF_ERROR(vectorized::schema_util::check_variant_has_no_ambiguous_paths(all_paths));
+    return Status::OK();
+}
 /**
  * Processes dictionary entries by matching items from the given item map.
  * It maps items to their dictionary keys, then adds these keys to the rowset 
metadata.
@@ -101,7 +123,7 @@ Status process_dictionary(SchemaCloudDictionary& dict,
         return output;
     };
 
-    google::protobuf::RepeatedPtrField<ItemPB> none_ext_items;
+    google::protobuf::RepeatedPtrField<ItemPB> none_extracted_items;
     std::unordered_map<std::string, int> reversed_dict;
     for (const auto& [key, val] : item_dict) {
         reversed_dict[serialize_fn(val)] = key;
@@ -110,7 +132,7 @@ Status process_dictionary(SchemaCloudDictionary& dict,
     for (const auto& item : items) {
         if (filter(item)) {
             // Filter none extended items, mainly extended columns and 
extended indexes
-            *none_ext_items.Add() = item;
+            *none_extracted_items.Add() = item;
             continue;
         }
         const std::string serialized_key = serialize_fn(item);
@@ -127,7 +149,7 @@ Status process_dictionary(SchemaCloudDictionary& dict,
     }
     // clear extended items to prevent writing them to fdb
     if (result != nullptr) {
-        result->Swap(&none_ext_items);
+        result->Swap(&none_extracted_items);
     }
     return Status::OK();
 }
@@ -137,11 +159,15 @@ Status 
SchemaCloudDictionaryCache::replace_schema_to_dict_keys(int64_t index_id,
     if (!rowset_meta->has_variant_type_in_schema()) {
         return Status::OK();
     }
+    // first attempt to get dict from cache
     auto dict = _lookup(index_id);
     if (!dict) {
-        g_schema_dict_cache_miss_count << 1;
-        return Status::NotFound<false>("Not found dict {}", index_id);
+        // if not found the dict in cache, then refresh the dict from remote 
meta service
+        RETURN_IF_ERROR(refresh_dict(index_id, &dict));
     }
+    // here we should have the dict
+    DCHECK(dict);
+    RETURN_IF_ERROR(check_path_amibigus(*dict, rowset_meta));
     auto* dict_list = rowset_meta->mutable_schema_dict_key_list();
     // Process column dictionary: add keys for non-extended columns.
     auto column_filter = [&](const doris::ColumnPB& col) -> bool { return 
col.unique_id() >= 0; };
diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index 6f1600c39ae..1ea9d8d860d 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -194,11 +194,13 @@ Status BaseTablet::update_by_least_common_schema(const 
TabletSchemaSPtr& update_
     CHECK(_max_version_schema->schema_version() >= 
update_schema->schema_version());
     TabletSchemaSPtr final_schema;
     bool check_column_size = true;
+    VLOG_DEBUG << "dump _max_version_schema: " << 
_max_version_schema->dump_full_schema();
+    VLOG_DEBUG << "dump update_schema: " << update_schema->dump_full_schema();
     RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema(
             {_max_version_schema, update_schema}, _max_version_schema, 
final_schema,
             check_column_size));
     _max_version_schema = final_schema;
-    VLOG_DEBUG << "dump updated tablet schema: " << 
final_schema->dump_structure();
+    VLOG_DEBUG << "dump updated tablet schema: " << 
final_schema->dump_full_schema();
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 56f1d16eecb..35139fa4ba9 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -874,13 +874,13 @@ int64_t BetaRowsetWriter::_num_seg() const {
 // Eg. rowset schema:       A(int),    B(float),  C(int), D(int)
 // _tabelt->tablet_schema:  A(bigint), B(double)
 //  => update_schema:       A(bigint), B(double), C(int), D(int)
-void BaseBetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr flush_schema) 
{
+Status BaseBetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr 
flush_schema) {
     std::lock_guard<std::mutex> lock(*(_context.schema_lock));
     TabletSchemaSPtr update_schema;
     if (_context.merged_tablet_schema == nullptr) {
         _context.merged_tablet_schema = _context.tablet_schema;
     }
-    static_cast<void>(vectorized::schema_util::get_least_common_schema(
+    RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema(
             {_context.merged_tablet_schema, flush_schema}, nullptr, 
update_schema));
     CHECK_GE(update_schema->num_columns(), flush_schema->num_columns())
             << "Rowset merge schema columns count is " << 
update_schema->num_columns()
@@ -889,6 +889,7 @@ void 
BaseBetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr flush_schema) {
             << " flush_schema: " << flush_schema->dump_structure();
     _context.merged_tablet_schema.swap(update_schema);
     VLOG_DEBUG << "dump rs schema: " << 
_context.tablet_schema->dump_structure();
+    return Status::OK();
 }
 
 Status BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool 
check_segment_num) {
@@ -1106,7 +1107,7 @@ Status BaseBetaRowsetWriter::add_segment(uint32_t 
segment_id, const SegmentStati
     }
     // tablet schema updated
     if (flush_schema != nullptr) {
-        update_rowset_schema(flush_schema);
+        RETURN_IF_ERROR(update_rowset_schema(flush_schema));
     }
     if (_context.mow_context != nullptr) {
         // ensure that the segment file writing is complete
diff --git a/be/src/olap/rowset/beta_rowset_writer.h 
b/be/src/olap/rowset/beta_rowset_writer.h
index ce89cad7a99..9cb72896f74 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -202,7 +202,7 @@ public:
     }
 
 private:
-    void update_rowset_schema(TabletSchemaSPtr flush_schema);
+    Status update_rowset_schema(TabletSchemaSPtr flush_schema);
     // build a tmp rowset for load segment to calc delete_bitmap
     // for this segment
 protected:
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
index d74ba8d4f03..c7ab8499c64 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
@@ -143,7 +143,11 @@ private:
             PathInData relative_path = 
node.path.copy_pop_nfront(_path.get_parts().size());
 
             if (node.path.has_nested_part()) {
-                CHECK_EQ(node.data.type->get_primitive_type(), 
PrimitiveType::TYPE_ARRAY);
+                if (node.data.type->get_primitive_type() != 
PrimitiveType::TYPE_ARRAY) {
+                    return Status::InternalError(
+                            "Meet none array column when flatten nested array, 
path {}, type {}",
+                            node.path.get_path(), node.data.type->get_name());
+                }
                 PathInData parent_path = 
node.path.get_nested_prefix_path().copy_pop_nfront(
                         _path.get_parts().size());
                 nested_subcolumns[parent_path].emplace_back(relative_path, 
column->get_ptr(),
diff --git a/be/src/service/internal_service.cpp 
b/be/src/service/internal_service.cpp
index 2127e90e351..2c6bb59a53d 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -1205,8 +1205,13 @@ void 
PInternalService::fetch_remote_tablet_schema(google::protobuf::RpcControlle
             if (!schemas.empty() && st.ok()) {
                 // merge all
                 TabletSchemaSPtr merged_schema;
-                
static_cast<void>(vectorized::schema_util::get_least_common_schema(schemas, 
nullptr,
-                                                                               
    merged_schema));
+                st = vectorized::schema_util::get_least_common_schema(schemas, 
nullptr,
+                                                                      
merged_schema);
+                if (!st.ok()) {
+                    LOG(WARNING) << "Failed to get least common schema: " << 
st.to_string();
+                    st = Status::InternalError("Failed to get least common 
schema: {}",
+                                               st.to_string());
+                }
                 VLOG_DEBUG << "dump schema:" << 
merged_schema->dump_structure();
                 merged_schema->reserve_extracted_columns();
                 merged_schema->to_schema_pb(response->mutable_merged_schema());
@@ -1242,8 +1247,13 @@ void 
PInternalService::fetch_remote_tablet_schema(google::protobuf::RpcControlle
                 if (!tablet_schemas.empty()) {
                     // merge all
                     TabletSchemaSPtr merged_schema;
-                    
static_cast<void>(vectorized::schema_util::get_least_common_schema(
-                            tablet_schemas, nullptr, merged_schema));
+                    st = 
vectorized::schema_util::get_least_common_schema(tablet_schemas, nullptr,
+                                                                          
merged_schema);
+                    if (!st.ok()) {
+                        LOG(WARNING) << "Failed to get least common schema: " 
<< st.to_string();
+                        st = Status::InternalError("Failed to get least common 
schema: {}",
+                                                   st.to_string());
+                    }
                     
merged_schema->to_schema_pb(response->mutable_merged_schema());
                     VLOG_DEBUG << "dump schema:" << 
merged_schema->dump_structure();
                 }
diff --git a/be/src/vec/common/schema_util.cpp 
b/be/src/vec/common/schema_util.cpp
index 51be756cf94..044f6527e72 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -238,10 +238,67 @@ TabletColumn get_column_by_type(const 
vectorized::DataTypePtr& data_type, const
     return result;
 }
 
-void update_least_schema_internal(const std::map<PathInData, DataTypes>& 
subcolumns_types,
-                                  TabletSchemaSPtr& common_schema, bool 
update_sparse_column,
-                                  int32_t variant_col_unique_id,
-                                  std::set<PathInData>* path_set = nullptr) {
+// check if two paths which same prefix have different structure
+static bool has_different_structure_in_same_path(const PathInData::Parts& lhs,
+                                                 const PathInData::Parts& rhs) 
{
+    if (lhs.size() != rhs.size()) {
+        return false; // different size means different structure
+    }
+    // Since we group by path string, lhs and rhs must have the same size and 
keys
+    // We only need to check if they have different nested structure
+    for (size_t i = 0; i < lhs.size(); ++i) {
+        if (lhs[i] != rhs[i]) {
+            VLOG_DEBUG << fmt::format(
+                    "Check different structure: {} vs {}, lhs[i].is_nested: 
{}, rhs[i].is_nested: "
+                    "{}",
+                    lhs[i].key, rhs[i].key, lhs[i].is_nested, 
rhs[i].is_nested);
+            return true;
+        }
+    }
+    return false;
+}
+
+Status check_variant_has_no_ambiguous_paths(const PathsInData& tuple_paths) {
+    // Group paths by their string representation to reduce comparisons
+    std::unordered_map<std::string, std::vector<size_t>> path_groups;
+
+    for (size_t i = 0; i < tuple_paths.size(); ++i) {
+        // same path should have same structure, so we group them by path
+        path_groups[tuple_paths[i].get_path()].push_back(i);
+        // print part of tuple_paths[i]
+        VLOG_DEBUG << "tuple_paths[i]: " << tuple_paths[i].get_path();
+        for (const auto& part : tuple_paths[i].get_parts()) {
+            VLOG_DEBUG << "part: " << part.key << ", is_nested: " << 
part.is_nested;
+        }
+    }
+
+    // Only compare paths within the same group
+    for (const auto& [path_str, indices] : path_groups) {
+        if (indices.size() <= 1) {
+            continue; // No conflicts possible
+        }
+
+        // Compare all pairs within this group
+        for (size_t i = 0; i < indices.size(); ++i) {
+            for (size_t j = 0; j < i; ++j) {
+                if 
(has_different_structure_in_same_path(tuple_paths[indices[i]].get_parts(),
+                                                         
tuple_paths[indices[j]].get_parts())) {
+                    return Status::DataQualityError(
+                            "Ambiguous paths: {} vs {} with different nested 
part {} vs {}",
+                            tuple_paths[indices[i]].get_path(), 
tuple_paths[indices[j]].get_path(),
+                            tuple_paths[indices[i]].has_nested_part(),
+                            tuple_paths[indices[j]].has_nested_part());
+                }
+            }
+        }
+    }
+    return Status::OK();
+}
+
+Status update_least_schema_internal(const std::map<PathInData, DataTypes>& 
subcolumns_types,
+                                    TabletSchemaSPtr& common_schema, bool 
update_sparse_column,
+                                    int32_t variant_col_unique_id,
+                                    std::set<PathInData>* path_set = nullptr) {
     PathsInData tuple_paths;
     DataTypes tuple_types;
     CHECK(common_schema.use_count() == 1);
@@ -292,13 +349,18 @@ void update_least_schema_internal(const 
std::map<PathInData, DataTypes>& subcolu
             path_set->insert(tuple_paths[i]);
         }
     }
+    return Status::OK();
 }
 
-void update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
-                                TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
-                                std::set<PathInData>* path_set) {
+Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
+                                  TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
+                                  std::set<PathInData>* path_set) {
     // Types of subcolumns by path from all tuples.
     std::map<PathInData, DataTypes> subcolumns_types;
+
+    // Collect all paths first to enable batch checking
+    std::vector<PathInData> all_paths;
+
     for (const TabletSchemaSPtr& schema : schemas) {
         for (const TabletColumnPtr& col : schema->columns()) {
             // Get subcolumns of this variant
@@ -306,9 +368,14 @@ void update_least_common_schema(const 
std::vector<TabletSchemaSPtr>& schemas,
                 col->parent_unique_id() == variant_col_unique_id) {
                 subcolumns_types[*col->path_info_ptr()].push_back(
                         DataTypeFactory::instance().create_data_type(*col, 
col->is_nullable()));
+                all_paths.push_back(*col->path_info_ptr());
             }
         }
     }
+
+    // Batch check for conflicts
+    RETURN_IF_ERROR(check_variant_has_no_ambiguous_paths(all_paths));
+
     for (const TabletSchemaSPtr& schema : schemas) {
         if (schema->field_index(variant_col_unique_id) == -1) {
             // maybe dropped
@@ -326,13 +393,13 @@ void update_least_common_schema(const 
std::vector<TabletSchemaSPtr>& schemas,
             }
         }
     }
-    update_least_schema_internal(subcolumns_types, common_schema, false, 
variant_col_unique_id,
-                                 path_set);
+    return update_least_schema_internal(subcolumns_types, common_schema, false,
+                                        variant_col_unique_id, path_set);
 }
 
-void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
-                                TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
-                                const std::set<PathInData>& path_set) {
+Status update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
+                                  TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
+                                  const std::set<PathInData>& path_set) {
     // Types of subcolumns by path from all tuples.
     std::map<PathInData, DataTypes> subcolumns_types;
     for (const TabletSchemaSPtr& schema : schemas) {
@@ -351,7 +418,8 @@ void update_least_sparse_column(const 
std::vector<TabletSchemaSPtr>& schemas,
             }
         }
     }
-    update_least_schema_internal(subcolumns_types, common_schema, true, 
variant_col_unique_id);
+    return update_least_schema_internal(subcolumns_types, common_schema, true,
+                                        variant_col_unique_id);
 }
 
 void inherit_column_attributes(const TabletColumn& source, TabletColumn& 
target,
@@ -406,7 +474,6 @@ Status get_least_common_schema(const 
std::vector<TabletSchemaSPtr>& schemas,
                                const TabletSchemaSPtr& base_schema, 
TabletSchemaSPtr& output_schema,
                                bool check_schema_size) {
     std::vector<int32_t> variant_column_unique_id;
-
     // Construct a schema excluding the extracted columns and gather unique 
identifiers for variants.
     // Ensure that the output schema also excludes these extracted columns. 
This approach prevents
     // duplicated paths following the update_least_common_schema process.
@@ -453,9 +520,9 @@ Status get_least_common_schema(const 
std::vector<TabletSchemaSPtr>& schemas,
         std::set<PathInData> path_set;
         // 1. cast extracted column to common type
         // path set is used to record the paths of those sparse columns that 
have been merged into the extracted columns, eg: v:b
-        update_least_common_schema(schemas, output_schema, unique_id, 
&path_set);
+        RETURN_IF_ERROR(update_least_common_schema(schemas, output_schema, 
unique_id, &path_set));
         // 2. cast sparse column to common type, exclude the columns from the 
path set
-        update_least_sparse_column(schemas, output_schema, unique_id, 
path_set);
+        RETURN_IF_ERROR(update_least_sparse_column(schemas, output_schema, 
unique_id, path_set));
     }
 
     inherit_column_attributes(output_schema);
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index 08ca16c6900..565400b2d38 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -87,6 +87,11 @@ Status parse_variant_columns(Block& block, const 
std::vector<int>& variant_pos,
                              const ParseConfig& config);
 Status encode_variant_sparse_subcolumns(ColumnVariant& column);
 
+// check if the tuple_paths has ambiguous paths
+// situation:
+// throw exception if there exists a prefix with matched names, but not 
matched structure (is Nested, number of dimensions).
+Status check_variant_has_no_ambiguous_paths(const std::vector<PathInData>& 
paths);
+
 // Pick the tablet schema with the highest schema version as the reference.
 // Then update all variant columns to there least common types.
 // Return the final merged schema as common schema.
@@ -97,13 +102,13 @@ Status get_least_common_schema(const 
std::vector<TabletSchemaSPtr>& schemas,
 
 // Get least common types for extracted columns which has Path info,
 // with a speicified variant column's unique id
-void update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
-                                TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
-                                std::unordered_set<PathInData, 
PathInData::Hash>* path_set);
+Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
+                                  TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
+                                  std::unordered_set<PathInData, 
PathInData::Hash>* path_set);
 
-void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
-                                TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
-                                const std::unordered_set<PathInData, 
PathInData::Hash>& path_set);
+Status update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
+                                  TabletSchemaSPtr& common_schema, int32_t 
variant_col_unique_id,
+                                  const std::unordered_set<PathInData, 
PathInData::Hash>& path_set);
 
 // inherit attributes like index/agg info from it's parent column
 void inherit_column_attributes(TabletSchemaSPtr& schema);
diff --git a/be/test/cloud/test_schema_cloud_dictionary_cache.cpp 
b/be/test/cloud/test_schema_cloud_dictionary_cache.cpp
index 3d05eb67e45..0fc4fd0c3f5 100644
--- a/be/test/cloud/test_schema_cloud_dictionary_cache.cpp
+++ b/be/test/cloud/test_schema_cloud_dictionary_cache.cpp
@@ -15,9 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "cloud/schema_cloud_dictionary_cache.cpp"
 #include "cloud/schema_cloud_dictionary_cache.h"
 #include "gen_cpp/olap_file.pb.h"
 #include "gtest/gtest.h"
+#include "vec/json/path_in_data.h"
 
 namespace doris {
 
@@ -175,4 +177,56 @@ TEST(SchemaCloudDictionaryCacheTest, 
ReplaceDictKeysToSchema_RefreshFailure) {
     EXPECT_FALSE(st.ok());
 }
 
+// Test case 5: replace_schema_to_dict_keys with 
tablet_schema.enable_variant_flatten_nested = true
+TEST(SchemaCloudDictionaryCacheTest, 
ProcessDictionary_VariantPathConflict_Throws) {
+    SchemaCloudDictionarySPtr dict = std::make_shared<SchemaCloudDictionary>();
+    // construct two variant columns with same unique_id but different 
path_info
+    auto& col_dict = *dict->mutable_column_dict();
+    ColumnPB* col1 = &(col_dict)[101];
+    col1->set_unique_id(101);
+    vectorized::PathInDataBuilder builder1;
+    builder1.append("v", false).append("nested", true).append("a", false);
+    vectorized::PathInData path_in_data1 = builder1.build();
+    segment_v2::ColumnPathInfo path_info1;
+    path_in_data1.to_protobuf(&path_info1, 0);
+    col1->mutable_column_path_info()->CopyFrom(path_info1);
+    {
+        RowsetMetaCloudPB rs_meta;
+        rs_meta.set_has_variant_type_in_schema(true);
+        auto* schema = rs_meta.mutable_tablet_schema();
+        schema->set_enable_variant_flatten_nested(true);
+        // add two columns with same key but different is_nested value
+        auto* col_schema1 = schema->add_column();
+        col_schema1->set_unique_id(101);
+        // create pathIndata with same key but different is_nested value
+        vectorized::PathInDataBuilder builder3;
+        builder3.append("v", false).append("nested", false).append("a", false);
+        vectorized::PathInData path_in_data3 = builder3.build();
+        segment_v2::ColumnPathInfo path_info3;
+        path_in_data3.to_protobuf(&path_info3, 0);
+        col_schema1->mutable_column_path_info()->CopyFrom(path_info3);
+        auto st = check_path_amibigus(*dict, &rs_meta);
+        EXPECT_FALSE(st.ok());
+        EXPECT_EQ(st.code(), TStatusCode::DATA_QUALITY_ERROR);
+    }
+
+    {
+        RowsetMetaCloudPB rs_meta;
+        rs_meta.set_has_variant_type_in_schema(true);
+        auto* schema = rs_meta.mutable_tablet_schema();
+        // add two columns with same key but same is_nested value
+        auto* col_schema3 = schema->add_column();
+        col_schema3->set_unique_id(101);
+        vectorized::PathInDataBuilder builder5;
+        builder5.append("v", false).append("nested", true).append("a", false);
+        vectorized::PathInData path_in_data5 = builder5.build();
+        segment_v2::ColumnPathInfo path_info5;
+        path_in_data5.to_protobuf(&path_info5, 0);
+        col_schema3->mutable_column_path_info()->CopyFrom(path_info5);
+        // assert no exception
+        auto st = check_path_amibigus(*dict, &rs_meta);
+        EXPECT_TRUE(st.ok()) << st.to_string();
+    }
+}
+
 } // namespace doris
\ No newline at end of file
diff --git a/be/test/common/schema_util_test.cpp 
b/be/test/common/schema_util_test.cpp
index fb8b23c10cb..743db751dc1 100644
--- a/be/test/common/schema_util_test.cpp
+++ b/be/test/common/schema_util_test.cpp
@@ -118,4 +118,273 @@ TEST_F(SchemaUtilTest, inherit_column_attributes) {
     }
 }
 
+// Test has_different_structure_in_same_path function indirectly through 
check_variant_has_no_ambiguous_paths
+TEST_F(SchemaUtilTest, has_different_structure_in_same_path_indirect) {
+    // Test case 1: Same structure and same length - should not detect 
ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", false).append("b", false).append("c", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a", false).append("b", false).append("c", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 2: Different keys at same position - should not detect 
ambiguity (different keys)
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", false).append("b", false).append("c", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a", false).append("d", false).append("c", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 3: Same keys but different nested structure - should detect 
ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", false).append("b", true);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a", false).append("b", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_FALSE(status.ok());
+        EXPECT_TRUE(status.to_string().find("Ambiguous paths") != 
std::string::npos);
+    }
+
+    // Test case 4: Same keys but different anonymous array levels - should 
detect ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", true).append("b", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a", false).append("b", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_FALSE(status.ok());
+        EXPECT_TRUE(status.to_string().find("Ambiguous paths") != 
std::string::npos);
+    }
+
+    // Test case 5: Same keys but different nested and anonymous levels - 
should detect ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", true).append("b", true);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a", false).append("b", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_FALSE(status.ok());
+        EXPECT_TRUE(status.to_string().find("Ambiguous paths") != 
std::string::npos);
+    }
+
+    // Test case 6: Different lengths - should not detect ambiguity (new 
behavior: only check same length paths)
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", false).append("b", false).append("c", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a", false).append("b", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 7: Different lengths with structure difference - should not 
detect ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", false).append("b", true).append("c", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a", false).append("b", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 8: Complex nested structure difference with same length - 
should detect ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("user", false).append("address", 
true).append("street", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("user", false).append("address", 
false).append("street", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_FALSE(status.ok());
+        EXPECT_TRUE(status.to_string().find("Ambiguous paths") != 
std::string::npos);
+    }
+
+    // Test case 9: Multiple paths with different lengths - should not detect 
ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("config", false).append("database", 
false).append("host", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("config", false).append("database", false);
+        paths.emplace_back(builder2.build());
+
+        vectorized::PathInDataBuilder builder3;
+        builder3.append("config", false);
+        paths.emplace_back(builder3.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 10: Empty paths - should not detect ambiguity
+    {
+        vectorized::PathsInData paths;
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 11: Single path - should not detect ambiguity
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("single", false).append("path", false);
+        paths.emplace_back(builder1.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 12: we have path like '{"a.b": "UPPER CASE", "a.c": "lower 
case", "a" : {"b" : 123}, "a" : {"c" : 456}}'
+    {
+        vectorized::PathsInData paths;
+        vectorized::PathInDataBuilder builder1;
+        builder1.append("a", false).append("b", false);
+        paths.emplace_back(builder1.build());
+
+        vectorized::PathInDataBuilder builder2;
+        builder2.append("a.b", false);
+        paths.emplace_back(builder2.build());
+
+        auto status = 
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+}
+
+// Test check_path_conflicts_with_existing function indirectly through 
update_least_common_schema
+TEST_F(SchemaUtilTest, check_path_conflicts_with_existing) {
+    // Test case 1: No conflicts - should succeed
+    {
+        TabletSchemaPB schema_pb;
+        schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
+        // Create a variant column
+        construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, 
"v1_index", 1,
+                         "VARIANT", "v1", IndexType::INVERTED);
+
+        TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+        tablet_schema->init_from_pb(schema_pb);
+        std::vector<TabletColumn> subcolumns;
+
+        // Add subcolumns with different paths
+        construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_STRING, 
1, "v1.name",
+                            &subcolumns);
+        construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_INT, 1, 
"v1.age",
+                            &subcolumns);
+
+        std::vector<TabletSchemaSPtr> schemas = {tablet_schema};
+        TabletSchemaSPtr output_schema;
+
+        auto status = 
vectorized::schema_util::get_least_common_schema(schemas, nullptr,
+                                                                       
output_schema, false);
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 2: Conflicts with same path but different structure - should 
fail
+    {
+        TabletSchemaPB schema_pb;
+        schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
+        // Create a variant column
+        construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, 
"v1_index", 1,
+                         "VARIANT", "v1", IndexType::INVERTED);
+
+        TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+        tablet_schema->init_from_pb(schema_pb);
+
+        // Add subcolumns with same path but different structure
+        // This would require creating paths with different nested structure
+        // For now, we'll test the basic functionality
+
+        std::vector<TabletSchemaSPtr> schemas = {tablet_schema};
+        TabletSchemaSPtr output_schema;
+
+        auto status = 
vectorized::schema_util::get_least_common_schema(schemas, nullptr,
+                                                                       
output_schema, false);
+        // This should succeed since we don't have conflicting paths in this 
simple case
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+
+    // Test case 3: Multiple schemas with conflicting paths - should fail
+    {
+        // Create first schema
+        TabletSchemaPB schema_pb1;
+        schema_pb1.set_keys_type(KeysType::DUP_KEYS);
+        construct_column(schema_pb1.add_column(), schema_pb1.add_index(), 
10001, "v1_index", 1,
+                         "VARIANT", "v1", IndexType::INVERTED);
+
+        TabletSchemaSPtr tablet_schema1 = std::make_shared<TabletSchema>();
+        tablet_schema1->init_from_pb(schema_pb1);
+        std::vector<TabletColumn> subcolumns;
+        construct_subcolumn(tablet_schema1, FieldType::OLAP_FIELD_TYPE_STRING, 
1, "v1.address",
+                            &subcolumns);
+
+        // Create second schema with same path but different structure
+        TabletSchemaPB schema_pb2;
+        schema_pb2.set_keys_type(KeysType::DUP_KEYS);
+        construct_column(schema_pb2.add_column(), schema_pb2.add_index(), 
10001, "v1_index", 1,
+                         "VARIANT", "v1", IndexType::INVERTED);
+
+        TabletSchemaSPtr tablet_schema2 = std::make_shared<TabletSchema>();
+        tablet_schema2->init_from_pb(schema_pb2);
+        std::vector<TabletColumn> subcolumns2;
+        construct_subcolumn(tablet_schema2, FieldType::OLAP_FIELD_TYPE_INT, 1, 
"v1.address",
+                            &subcolumns2);
+
+        std::vector<TabletSchemaSPtr> schemas = {tablet_schema1, 
tablet_schema2};
+        TabletSchemaSPtr output_schema;
+
+        auto status = 
vectorized::schema_util::get_least_common_schema(schemas, nullptr,
+                                                                       
output_schema, false);
+        // This should succeed since the paths are the same and we're just 
checking for structure conflicts
+        EXPECT_TRUE(status.ok()) << status.to_string();
+    }
+}
+
 } // namespace doris
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index a12c74b04ec..38d41da0480 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -2453,6 +2453,12 @@ public class InternalCatalog implements 
CatalogIf<Database> {
         boolean variantEnableFlattenNested  = false;
         try {
             variantEnableFlattenNested = 
PropertyAnalyzer.analyzeVariantFlattenNested(properties);
+            // session variable: disable_variant_flatten_nested = true
+            // with table property: variant_enable_flatten_nested = true we 
should throw error
+            if (ctx.getSessionVariable().getDisableVariantFlattenNested() && 
variantEnableFlattenNested) {
+                throw new DdlException("If you want to enable variant flatten 
nested, "
+                        + "please set session variable: 
disable_variant_flatten_nested = false");
+            }
         } catch (AnalysisException e) {
             throw new DdlException(e.getMessage());
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 12cc7d6d756..bda47f87f11 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -703,6 +703,10 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String DISABLE_INVERTED_INDEX_V1_FOR_VARIANT = 
"disable_inverted_index_v1_for_variant";
 
+    // disable variant flatten nested as session variable, default is true,
+    // which means disable variant flatten nested when create table
+    public static final String DISABLE_VARIANT_FLATTEN_NESTED = 
"disable_variant_flatten_nested";
+
     // CLOUD_VARIABLES_BEGIN
     public static final String CLOUD_CLUSTER = "cloud_cluster";
     public static final String DISABLE_EMPTY_PARTITION_PRUNE = 
"disable_empty_partition_prune";
@@ -1385,6 +1389,9 @@ public class SessionVariable implements Serializable, 
Writable {
     @VariableMgr.VarAttr(name = DISABLE_INVERTED_INDEX_V1_FOR_VARIANT, 
needForward = true)
     private boolean disableInvertedIndexV1ForVaraint = true;
 
+    @VariableMgr.VarAttr(name = DISABLE_VARIANT_FLATTEN_NESTED, needForward = 
true)
+    private boolean disableVariantFlattenNested = true;
+
     public int getBeNumberForTest() {
         return beNumberForTest;
     }
@@ -5024,6 +5031,14 @@ public class SessionVariable implements Serializable, 
Writable {
         return disableInvertedIndexV1ForVaraint;
     }
 
+    public void setDisableVariantFlattenNested(boolean 
disableVariantFlattenNested) {
+        this.disableVariantFlattenNested = disableVariantFlattenNested;
+    }
+
+    public boolean getDisableVariantFlattenNested() {
+        return disableVariantFlattenNested;
+    }
+
     public void setProfileLevel(String profileLevel) {
         int profileLevelTmp = Integer.valueOf(profileLevel);
         if (profileLevelTmp < 1 || profileLevelTmp > 3) {
diff --git a/gensrc/thrift/AgentService.thrift 
b/gensrc/thrift/AgentService.thrift
index f807c0c89e7..e4a3a554429 100644
--- a/gensrc/thrift/AgentService.thrift
+++ b/gensrc/thrift/AgentService.thrift
@@ -48,7 +48,7 @@ struct TTabletSchema {
     // col unique id for row store column
     20: optional list<i32> row_store_col_cids
     21: optional i64 row_store_page_size = 16384
-    22: optional bool variant_enable_flatten_nested = false 
+    22: optional bool variant_enable_flatten_nested = false
     23: optional i64 storage_page_size = 65536
     24: optional i64 storage_dict_page_size = 262144
 }
diff --git a/regression-test/data/variant_p0/nested/load.out 
b/regression-test/data/variant_p0/nested/load.out
new file mode 100644
index 00000000000..d0cb9d65fae
Binary files /dev/null and b/regression-test/data/variant_p0/nested/load.out 
differ
diff --git a/regression-test/data/variant_p0/nested/sql/q01.out 
b/regression-test/data/variant_p0/nested/sql/q01.out
new file mode 100644
index 00000000000..ea77db963fe
Binary files /dev/null and b/regression-test/data/variant_p0/nested/sql/q01.out 
differ
diff --git a/regression-test/suites/variant_p0/delete_update.groovy 
b/regression-test/suites/variant_p0/delete_update.groovy
index 92da76ad3ce..dcae6c628bf 100644
--- a/regression-test/suites/variant_p0/delete_update.groovy
+++ b/regression-test/suites/variant_p0/delete_update.groovy
@@ -21,6 +21,7 @@ suite("regression_test_variant_delete_and_update", 
"variant_type"){
     // MOR
     def table_name = "var_delete_update"
     sql "DROP TABLE IF EXISTS ${table_name}"
+    sql """ set disable_variant_flatten_nested = false """
     sql """
         CREATE TABLE IF NOT EXISTS ${table_name} (
             k bigint,
diff --git a/regression-test/suites/variant_p0/nested.groovy 
b/regression-test/suites/variant_p0/nested.groovy
index 2ccbc82fdc4..9877f91f481 100644
--- a/regression-test/suites/variant_p0/nested.groovy
+++ b/regression-test/suites/variant_p0/nested.groovy
@@ -24,7 +24,7 @@ suite("regression_test_variant_nested", "p0"){
 
         def table_name = "var_nested"
         sql "DROP TABLE IF EXISTS ${table_name}"
-
+        sql "set disable_variant_flatten_nested = false"
         sql """
                 CREATE TABLE IF NOT EXISTS ${table_name} (
                     k bigint,
diff --git a/regression-test/suites/variant_p0/nested/load.groovy 
b/regression-test/suites/variant_p0/nested/load.groovy
new file mode 100644
index 00000000000..0fee39d4b3d
--- /dev/null
+++ b/regression-test/suites/variant_p0/nested/load.groovy
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// this test is used to test the load of nested array
+suite("variant_nested_type_load", "p0"){
+
+    try {
+
+        // create a table with conflict variant which insert same nested 
subcolumn and scalar subcolumn data 
+        def table_name = "var_nested_load_conflict"
+        sql "DROP TABLE IF EXISTS ${table_name}"
+        sql """set describe_extend_variant_column = true"""
+
+        // set disable_variant_flatten_nested = true to disable variant 
flatten nested which is default behavior
+        sql """ set disable_variant_flatten_nested = true """
+        test {
+            sql """
+                    CREATE TABLE IF NOT EXISTS ${table_name} (
+                        k bigint,
+                        v variant
+                    )
+                    DUPLICATE KEY(`k`)
+                    DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really 
compaction in conflict case
+                    properties("replication_num" = "1", 
"disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "true");
+                """
+            exception "If you want to enable variant flatten nested, please 
set session variable"
+        }
+        
+
+        // set disable_variant_flatten_nested = false to enable variant 
flatten nested
+        sql """ set disable_variant_flatten_nested = false """
+        sql """
+                    CREATE TABLE IF NOT EXISTS ${table_name} (
+                        k bigint,
+                        v variant
+                    )
+                    DUPLICATE KEY(`k`)
+                    DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really 
compaction in conflict case
+                    properties("replication_num" = "1", 
"disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "true");
+                """
+        sql """ insert into ${table_name} values (1, '{"nested": [{"a": 1, 
"c": 1.1}, {"b": "1"}]}'); """ 
+        
+        def desc_table = { tn ->
+            sql """ set describe_extend_variant_column = true """
+            sql """ select * from ${tn} order by k """
+            qt_sql_desc """ desc ${tn} """
+        }
+
+       def sql_select_batch = { tn ->
+            qt_sql_0 """select * from ${tn} order by k"""
+
+            qt_sql_1 """select v['nested']['a'] from ${tn} order by k"""
+            qt_sql_2 """select v['nested']['b'] from ${tn} order by k"""
+            qt_sql_3 """select v['nested']['c'] from ${tn} order by k"""
+
+            qt_sql_4 """select v['nested'] from ${tn} order by k"""
+        }
+
+        def sql_test_cast_to_array = { tn ->
+            // test cast to array<int> 
+            qt_sql_8 """select cast(v['nested']['a'] as array<int>), 
size(cast(v['nested']['a'] as array<int>)) from ${tn} order by k"""
+            qt_sql_9 """select cast(v['nested']['b'] as array<int>), 
size(cast(v['nested']['b'] as array<int>)) from ${tn} order by k"""
+            qt_sql_10 """select cast(v['nested']['c'] as array<int>), 
size(cast(v['nested']['c'] as array<int>)) from ${tn} order by k"""
+
+            // test cast to array<string> 
+            qt_sql_11 """select cast(v['nested']['a'] as array<string>), 
size(cast(v['nested']['a'] as array<string>)) from ${tn} order by k"""
+            qt_sql_12 """select cast(v['nested']['b'] as array<string>), 
size(cast(v['nested']['b'] as array<string>)) from ${tn} order by k"""
+            qt_sql_13 """select cast(v['nested']['c'] as array<string>), 
size(cast(v['nested']['c'] as array<string>)) from ${tn} order by k"""
+
+            // test cast to array<double> 
+            qt_sql_14 """select cast(v['nested']['a'] as array<double>), 
size(cast(v['nested']['a'] as array<double>)) from ${tn} order by k"""
+            qt_sql_15 """select cast(v['nested']['b'] as array<double>), 
size(cast(v['nested']['b'] as array<double>)) from ${tn} order by k"""
+            qt_sql_16 """select cast(v['nested']['c'] as array<double>), 
size(cast(v['nested']['c'] as array<double>)) from ${tn} order by k"""
+
+        }
+
+        def sql_test_cast_to_scalar = { tn ->
+            qt_sql_17 """select cast(v['nested']['a'] as int), 
cast(v['nested']['b'] as int), cast(v['nested']['c'] as int) from ${tn} order 
by k"""
+            qt_sql_18 """select cast(v['nested']['a'] as string), 
cast(v['nested']['b'] as string), cast(v['nested']['c'] as string) from ${tn} 
order by k"""
+            qt_sql_19 """select cast(v['nested']['a'] as double), 
cast(v['nested']['b'] as double), cast(v['nested']['c'] as double) from ${tn} 
order by k"""
+        }
+
+        /// insert a array of object for a, b, c 
+        // insert structure conflict in one row
+        //  a , b, c is Nested array,
+        def table_name_1 = "var_nested_load_no_conflict"
+        sql "DROP TABLE IF EXISTS ${table_name_1}"
+        sql """set describe_extend_variant_column = true"""
+        sql """
+                CREATE TABLE IF NOT EXISTS ${table_name_1} (
+                    k bigint,
+                    v variant
+                )
+                DUPLICATE KEY(`k`)
+                DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really 
compaction in conflict case
+                properties("replication_num" = "1", "disable_auto_compaction" 
= "false", "variant_enable_flatten_nested" = "true");
+            """
+        // insert a array of object for a, b, c first then insert structure 
conflict in one row
+        // insert structure conflict in one row
+        //  a , b, c is Nested array,
+        sql """
+            insert into ${table_name_1} values (1, '{"nested": [{"a": 1, "c": 
1.1}, {"b": "1"}]}'); 
+            """
+        sql_select_batch(table_name_1)
+        sql_test_cast_to_array(table_name_1)
+        sql_test_cast_to_scalar(table_name_1)
+        // insert structure conflict in one row
+        test {
+            sql """
+                insert into ${table_name_1} values (2, '{"nested": {"a": 2.5, 
"b": "123.1"}}');
+                """
+            exception "Ambiguous paths"
+        }
+        // insert more different combination data for a, b, c
+        sql """
+            insert into ${table_name_1} values (3, '{"nested": [{"a": 2.5, 
"b": "123.1"}]}');
+            """
+        sql """
+            insert into ${table_name_1} values (4, '{"nested": [{"a": 2.5, 
"b": 123.1}]}');
+            """
+        sql """
+            insert into ${table_name_1} values (5, '{"nested": [{"a": 2.5, 
"c": "123.1"}, {"b": "123.1"}]}');
+            """
+        sql """
+            insert into ${table_name_1} values (6, '{"nested": [{"a": 2.5}, 
{"b": 123.1}]}');
+            """
+        sql """
+            insert into ${table_name_1} values (7, '{"nested": [{"a": 2.5}, 
{"c": 123.1}, {"b": "123.1"}]}');
+            """
+        sql_select_batch(table_name_1)
+        sql_test_cast_to_array(table_name_1)
+        sql_test_cast_to_scalar(table_name_1)
+        // trigger and wait compaction
+        trigger_and_wait_compaction("${table_name_1}", "full")
+        sql_select_batch(table_name_1)
+        sql_test_cast_to_array(table_name_1)    
+        sql_test_cast_to_scalar(table_name_1)
+
+        // drop table
+        sql """ drop table ${table_name_1} """
+        sql """ create table ${table_name_1} (k bigint, v variant) duplicate 
key(k) distributed by hash(k) buckets 1 properties("replication_num" = "1", 
"disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "true") 
"""
+        // insert scalar data first then insert structure conflict in one row
+        sql """
+            insert into ${table_name_1} values (1, '{"nested": {"a": 2.5, "b": 
"123.1"}}');
+            """
+        sql_select_batch(table_name_1)
+        sql_test_cast_to_array(table_name_1)
+        sql_test_cast_to_scalar(table_name_1)
+        // insert structure conflict in one row:  a array of object for a, b, c
+        test {
+            sql """
+                insert into ${table_name_1} values (2, '{"nested": [{"a": 2.5, 
"b": "123.1"}]}');
+                """
+            exception "Ambiguous paths"
+        }
+        // insert more different combination data for a, b, c in scalar
+        sql """
+            insert into ${table_name_1} values (3, '{"nested": {"a": 2.5, "b": 
123.1}}');
+            """
+        sql """
+            insert into ${table_name_1} values (4, '{"nested": {"a": 2.5, "c": 
"123.1"}}');
+            """
+        sql """
+            insert into ${table_name_1} values (5, '{"nested": {"a": 2.5, "c": 
123.1}}');
+            """
+        sql """
+            insert into ${table_name_1} values (6, '{"nested": {"a": 2.5, "c": 
"123.1"}}');
+            """
+        sql """
+            insert into ${table_name_1} values (7, '{"nested": {"a": 2.5, "b": 
"123.1", "c": 123.1}}');
+            """
+        sql_select_batch(table_name_1)
+        sql_test_cast_to_array(table_name_1)
+        sql_test_cast_to_scalar(table_name_1)
+        // trigger and wait compaction
+        trigger_and_wait_compaction("${table_name_1}", "full")
+        sql_select_batch(table_name_1)
+        sql_test_cast_to_array(table_name_1)    
+        sql_test_cast_to_scalar(table_name_1)
+
+    } finally {
+    }
+
+}
diff --git a/regression-test/suites/variant_p0/nested/sql/q01.sql 
b/regression-test/suites/variant_p0/nested/sql/q01.sql
new file mode 100644
index 00000000000..71ee81428ed
--- /dev/null
+++ b/regression-test/suites/variant_p0/nested/sql/q01.sql
@@ -0,0 +1,13 @@
+-- TABLES: var_nested_load_conflict
+select v['nested']['a'] from var_nested_load_conflict order by k;
+select v['nested']['b'] from var_nested_load_conflict order by k;
+select v['nested']['c'] from var_nested_load_conflict order by k;
+select v['nested'] from var_nested_load_conflict order by k;
+
+select cast(v['nested']['a'] as array<int>), size(cast(v['nested']['a'] as 
array<int>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['b'] as array<int>), size(cast(v['nested']['b'] as 
array<int>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['c'] as array<int>), size(cast(v['nested']['c'] as 
array<int>)) from var_nested_load_conflict order by k;
+
+select cast(v['nested']['a'] as array<string>), size(cast(v['nested']['a'] as 
array<string>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['b'] as array<string>), size(cast(v['nested']['b'] as 
array<string>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['c'] as array<string>), size(cast(v['nested']['c'] as 
array<string>)) from var_nested_load_conflict order by k; 
\ No newline at end of file
diff --git a/regression-test/suites/variant_p0/nested2.groovy 
b/regression-test/suites/variant_p0/nested2.groovy
index 8d48fcfce9b..75d84a664c2 100644
--- a/regression-test/suites/variant_p0/nested2.groovy
+++ b/regression-test/suites/variant_p0/nested2.groovy
@@ -24,6 +24,7 @@ suite("variant_nested_type_conflict", "p0"){
         sql "DROP TABLE IF EXISTS ${table_name}"
         sql """set describe_extend_variant_column = true"""
 
+        sql """ set disable_variant_flatten_nested = false """
         sql """
                 CREATE TABLE IF NOT EXISTS ${table_name} (
                     k bigint,
diff --git 
a/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
 
b/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
index 5c9d85fb8ed..a8b78bdd258 100644
--- 
a/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
+++ 
b/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
@@ -57,6 +57,7 @@ suite("double_write_schema_change_with_variant", 
"nonConcurrent") {
 
     def table_name = "github_events"
     sql """DROP TABLE IF EXISTS ${table_name}"""
+    sql "set disable_variant_flatten_nested = false"
     sql """
         CREATE TABLE IF NOT EXISTS ${table_name} (
             k bigint,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [fix](variant) fix the reading core caused by inserting nested column and scalar column in variant sub-column (#53083)

Reply via email to