This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 958e9eefe49 [fix](variant) fix the reading core caused by inserting
nested column and scalar column in variant sub-column (#53083)
958e9eefe49 is described below
commit 958e9eefe49ebe613d8c9b930b3e0f6e8f887e9c
Author: amory <[email protected]>
AuthorDate: Thu Jul 24 14:31:09 2025 +0800
[fix](variant) fix the reading core caused by inserting nested column and
scalar column in variant sub-column (#53083)
this pr main fix the problem which if we create table with
```variant_enable_flatten_nested```
then insert variant data:
```'{"nested":{"a":"1"}}``` and
```'{"nested":[{"a":1,"c":1.1},{"b":"1"}]}'```
we will meet core for reading
so we should forbid this table property
and for old data we insert different structure data will meet some error
like this:
```
mysql> insert into vs values (2, '{"nested":{"a":"1"}}');
Query OK, 1 row affected (0.22 sec)
{'label':'label_165a8209698c4391_988c6532615017c4', 'status':'VISIBLE',
'txnId':'1011'}
mysql> insert into vs values (1,
'{"nested":[{"a":1,"c":1.1},{"b":"1"}]}');
ERROR 1105 (HY000): errCode = 2, detailMessage =
(10.16.10.6)[INTERNAL_ERROR]tablet 1752145213719 failed on majority
backends: [DATA_QUALITY_ERROR]PStatus:
(10.16.10.6)[DATA_QUALITY_ERROR]Ambiguous paths: v.nested.a vs
v.nested.a with different nested part true vs false
```
---
be/src/cloud/cloud_meta_mgr.cpp | 13 +-
be/src/cloud/schema_cloud_dictionary_cache.cpp | 36 ++-
be/src/olap/base_tablet.cpp | 4 +-
be/src/olap/rowset/beta_rowset_writer.cpp | 7 +-
be/src/olap/rowset/beta_rowset_writer.h | 2 +-
.../rowset/segment_v2/hierarchical_data_reader.h | 6 +-
be/src/service/internal_service.cpp | 18 +-
be/src/vec/common/schema_util.cpp | 99 ++++++--
be/src/vec/common/schema_util.h | 17 +-
.../cloud/test_schema_cloud_dictionary_cache.cpp | 54 +++++
be/test/common/schema_util_test.cpp | 269 +++++++++++++++++++++
.../apache/doris/datasource/InternalCatalog.java | 6 +
.../java/org/apache/doris/qe/SessionVariable.java | 15 ++
gensrc/thrift/AgentService.thrift | 2 +-
regression-test/data/variant_p0/nested/load.out | Bin 0 -> 7411 bytes
regression-test/data/variant_p0/nested/sql/q01.out | Bin 0 -> 377 bytes
.../suites/variant_p0/delete_update.groovy | 1 +
regression-test/suites/variant_p0/nested.groovy | 2 +-
.../suites/variant_p0/nested/load.groovy | 198 +++++++++++++++
.../suites/variant_p0/nested/sql/q01.sql | 13 +
regression-test/suites/variant_p0/nested2.groovy | 1 +
.../test_double_write_when_schema_change.groovy | 1 +
22 files changed, 722 insertions(+), 42 deletions(-)
diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index 9397a426bc5..024c16c3533 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -1049,11 +1049,18 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta&
rs_meta, const std::string&
// Replace schema dictionary keys based on the rowset's index ID to
maintain schema consistency.
CloudStorageEngine& engine =
ExecEnv::GetInstance()->storage_engine().to_cloud();
// if not enable dict cache, then directly return true to avoid refresh
- bool replaced =
+ Status replaced_st =
config::variant_use_cloud_schema_dict_cache
?
engine.get_schema_cloud_dictionary_cache().replace_schema_to_dict_keys(
rs_meta_pb.index_id(), req.mutable_rowset_meta())
- : true;
+ : Status::OK();
+ // if the replaced_st is not ok and alse not NotFound, then we need to
just return the replaced_st
+ VLOG_DEBUG << "replace schema to dict keys, replaced_st: " <<
replaced_st.to_string()
+ << ", replaced_st.is<ErrorCode::NOT_FOUND>(): "
+ << replaced_st.is<ErrorCode::NOT_FOUND>();
+ if (!replaced_st.ok() && !replaced_st.is<ErrorCode::NOT_FOUND>()) {
+ return replaced_st;
+ }
Status st = retry_rpc("commit rowset", req, &resp,
&MetaService_Stub::commit_rowset);
if (!st.ok() && resp.status().code() == MetaServiceCode::ALREADY_EXISTED) {
if (existed_rs_meta != nullptr && resp.has_existed_rowset_meta()) {
@@ -1067,7 +1074,7 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta&
rs_meta, const std::string&
// If dictionary replacement fails, it may indicate that the local schema
dictionary is outdated.
// Refreshing the dictionary here ensures that the rowset metadata is
updated with the latest schema definitions,
// which is critical for maintaining consistency between the rowset and
its corresponding schema.
- if (!replaced) {
+ if (replaced_st.is<ErrorCode::NOT_FOUND>()) {
RETURN_IF_ERROR(
engine.get_schema_cloud_dictionary_cache().refresh_dict(rs_meta_pb.index_id()));
}
diff --git a/be/src/cloud/schema_cloud_dictionary_cache.cpp
b/be/src/cloud/schema_cloud_dictionary_cache.cpp
index 25f0b232702..9fdde420ecb 100644
--- a/be/src/cloud/schema_cloud_dictionary_cache.cpp
+++ b/be/src/cloud/schema_cloud_dictionary_cache.cpp
@@ -19,6 +19,7 @@
#include <fmt/core.h>
#include <gen_cpp/olap_file.pb.h>
+#include <vec/common/schema_util.h>
#include <functional>
#include <memory>
@@ -62,6 +63,27 @@ SchemaCloudDictionarySPtr
SchemaCloudDictionaryCache::_lookup(int64_t index_id)
return dict;
}
+Status check_path_amibigus(const SchemaCloudDictionary& schema,
RowsetMetaCloudPB* rowset_meta) {
+ // if enable_variant_flatten_nested is false, then we don't need to check
path amibigus
+ if (!rowset_meta->tablet_schema().enable_variant_flatten_nested()) {
+ return Status::OK();
+ }
+ // try to get all the paths in the rowset meta
+ vectorized::PathsInData all_paths;
+ for (const auto& column : rowset_meta->tablet_schema().column()) {
+ vectorized::PathInData path_in_data;
+ path_in_data.from_protobuf(column.column_path_info());
+ all_paths.push_back(path_in_data);
+ }
+ // try to get all the paths in the schema dict
+ for (const auto& [_, column] : schema.column_dict()) {
+ vectorized::PathInData path_in_data;
+ path_in_data.from_protobuf(column.column_path_info());
+ all_paths.push_back(path_in_data);
+ }
+
RETURN_IF_ERROR(vectorized::schema_util::check_variant_has_no_ambiguous_paths(all_paths));
+ return Status::OK();
+}
/**
* Processes dictionary entries by matching items from the given item map.
* It maps items to their dictionary keys, then adds these keys to the rowset
metadata.
@@ -101,7 +123,7 @@ Status process_dictionary(SchemaCloudDictionary& dict,
return output;
};
- google::protobuf::RepeatedPtrField<ItemPB> none_ext_items;
+ google::protobuf::RepeatedPtrField<ItemPB> none_extracted_items;
std::unordered_map<std::string, int> reversed_dict;
for (const auto& [key, val] : item_dict) {
reversed_dict[serialize_fn(val)] = key;
@@ -110,7 +132,7 @@ Status process_dictionary(SchemaCloudDictionary& dict,
for (const auto& item : items) {
if (filter(item)) {
// Filter none extended items, mainly extended columns and
extended indexes
- *none_ext_items.Add() = item;
+ *none_extracted_items.Add() = item;
continue;
}
const std::string serialized_key = serialize_fn(item);
@@ -127,7 +149,7 @@ Status process_dictionary(SchemaCloudDictionary& dict,
}
// clear extended items to prevent writing them to fdb
if (result != nullptr) {
- result->Swap(&none_ext_items);
+ result->Swap(&none_extracted_items);
}
return Status::OK();
}
@@ -137,11 +159,15 @@ Status
SchemaCloudDictionaryCache::replace_schema_to_dict_keys(int64_t index_id,
if (!rowset_meta->has_variant_type_in_schema()) {
return Status::OK();
}
+ // first attempt to get dict from cache
auto dict = _lookup(index_id);
if (!dict) {
- g_schema_dict_cache_miss_count << 1;
- return Status::NotFound<false>("Not found dict {}", index_id);
+ // if not found the dict in cache, then refresh the dict from remote
meta service
+ RETURN_IF_ERROR(refresh_dict(index_id, &dict));
}
+ // here we should have the dict
+ DCHECK(dict);
+ RETURN_IF_ERROR(check_path_amibigus(*dict, rowset_meta));
auto* dict_list = rowset_meta->mutable_schema_dict_key_list();
// Process column dictionary: add keys for non-extended columns.
auto column_filter = [&](const doris::ColumnPB& col) -> bool { return
col.unique_id() >= 0; };
diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index 6f1600c39ae..1ea9d8d860d 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -194,11 +194,13 @@ Status BaseTablet::update_by_least_common_schema(const
TabletSchemaSPtr& update_
CHECK(_max_version_schema->schema_version() >=
update_schema->schema_version());
TabletSchemaSPtr final_schema;
bool check_column_size = true;
+ VLOG_DEBUG << "dump _max_version_schema: " <<
_max_version_schema->dump_full_schema();
+ VLOG_DEBUG << "dump update_schema: " << update_schema->dump_full_schema();
RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema(
{_max_version_schema, update_schema}, _max_version_schema,
final_schema,
check_column_size));
_max_version_schema = final_schema;
- VLOG_DEBUG << "dump updated tablet schema: " <<
final_schema->dump_structure();
+ VLOG_DEBUG << "dump updated tablet schema: " <<
final_schema->dump_full_schema();
return Status::OK();
}
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 56f1d16eecb..35139fa4ba9 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -874,13 +874,13 @@ int64_t BetaRowsetWriter::_num_seg() const {
// Eg. rowset schema: A(int), B(float), C(int), D(int)
// _tabelt->tablet_schema: A(bigint), B(double)
// => update_schema: A(bigint), B(double), C(int), D(int)
-void BaseBetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr flush_schema)
{
+Status BaseBetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr
flush_schema) {
std::lock_guard<std::mutex> lock(*(_context.schema_lock));
TabletSchemaSPtr update_schema;
if (_context.merged_tablet_schema == nullptr) {
_context.merged_tablet_schema = _context.tablet_schema;
}
- static_cast<void>(vectorized::schema_util::get_least_common_schema(
+ RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema(
{_context.merged_tablet_schema, flush_schema}, nullptr,
update_schema));
CHECK_GE(update_schema->num_columns(), flush_schema->num_columns())
<< "Rowset merge schema columns count is " <<
update_schema->num_columns()
@@ -889,6 +889,7 @@ void
BaseBetaRowsetWriter::update_rowset_schema(TabletSchemaSPtr flush_schema) {
<< " flush_schema: " << flush_schema->dump_structure();
_context.merged_tablet_schema.swap(update_schema);
VLOG_DEBUG << "dump rs schema: " <<
_context.tablet_schema->dump_structure();
+ return Status::OK();
}
Status BaseBetaRowsetWriter::_build_rowset_meta(RowsetMeta* rowset_meta, bool
check_segment_num) {
@@ -1106,7 +1107,7 @@ Status BaseBetaRowsetWriter::add_segment(uint32_t
segment_id, const SegmentStati
}
// tablet schema updated
if (flush_schema != nullptr) {
- update_rowset_schema(flush_schema);
+ RETURN_IF_ERROR(update_rowset_schema(flush_schema));
}
if (_context.mow_context != nullptr) {
// ensure that the segment file writing is complete
diff --git a/be/src/olap/rowset/beta_rowset_writer.h
b/be/src/olap/rowset/beta_rowset_writer.h
index ce89cad7a99..9cb72896f74 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -202,7 +202,7 @@ public:
}
private:
- void update_rowset_schema(TabletSchemaSPtr flush_schema);
+ Status update_rowset_schema(TabletSchemaSPtr flush_schema);
// build a tmp rowset for load segment to calc delete_bitmap
// for this segment
protected:
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
index d74ba8d4f03..c7ab8499c64 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
@@ -143,7 +143,11 @@ private:
PathInData relative_path =
node.path.copy_pop_nfront(_path.get_parts().size());
if (node.path.has_nested_part()) {
- CHECK_EQ(node.data.type->get_primitive_type(),
PrimitiveType::TYPE_ARRAY);
+ if (node.data.type->get_primitive_type() !=
PrimitiveType::TYPE_ARRAY) {
+ return Status::InternalError(
+ "Meet none array column when flatten nested array,
path {}, type {}",
+ node.path.get_path(), node.data.type->get_name());
+ }
PathInData parent_path =
node.path.get_nested_prefix_path().copy_pop_nfront(
_path.get_parts().size());
nested_subcolumns[parent_path].emplace_back(relative_path,
column->get_ptr(),
diff --git a/be/src/service/internal_service.cpp
b/be/src/service/internal_service.cpp
index 2127e90e351..2c6bb59a53d 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -1205,8 +1205,13 @@ void
PInternalService::fetch_remote_tablet_schema(google::protobuf::RpcControlle
if (!schemas.empty() && st.ok()) {
// merge all
TabletSchemaSPtr merged_schema;
-
static_cast<void>(vectorized::schema_util::get_least_common_schema(schemas,
nullptr,
-
merged_schema));
+ st = vectorized::schema_util::get_least_common_schema(schemas,
nullptr,
+
merged_schema);
+ if (!st.ok()) {
+ LOG(WARNING) << "Failed to get least common schema: " <<
st.to_string();
+ st = Status::InternalError("Failed to get least common
schema: {}",
+ st.to_string());
+ }
VLOG_DEBUG << "dump schema:" <<
merged_schema->dump_structure();
merged_schema->reserve_extracted_columns();
merged_schema->to_schema_pb(response->mutable_merged_schema());
@@ -1242,8 +1247,13 @@ void
PInternalService::fetch_remote_tablet_schema(google::protobuf::RpcControlle
if (!tablet_schemas.empty()) {
// merge all
TabletSchemaSPtr merged_schema;
-
static_cast<void>(vectorized::schema_util::get_least_common_schema(
- tablet_schemas, nullptr, merged_schema));
+ st =
vectorized::schema_util::get_least_common_schema(tablet_schemas, nullptr,
+
merged_schema);
+ if (!st.ok()) {
+ LOG(WARNING) << "Failed to get least common schema: "
<< st.to_string();
+ st = Status::InternalError("Failed to get least common
schema: {}",
+ st.to_string());
+ }
merged_schema->to_schema_pb(response->mutable_merged_schema());
VLOG_DEBUG << "dump schema:" <<
merged_schema->dump_structure();
}
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index 51be756cf94..044f6527e72 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -238,10 +238,67 @@ TabletColumn get_column_by_type(const
vectorized::DataTypePtr& data_type, const
return result;
}
-void update_least_schema_internal(const std::map<PathInData, DataTypes>&
subcolumns_types,
- TabletSchemaSPtr& common_schema, bool
update_sparse_column,
- int32_t variant_col_unique_id,
- std::set<PathInData>* path_set = nullptr) {
+// check if two paths which same prefix have different structure
+static bool has_different_structure_in_same_path(const PathInData::Parts& lhs,
+ const PathInData::Parts& rhs)
{
+ if (lhs.size() != rhs.size()) {
+ return false; // different size means different structure
+ }
+ // Since we group by path string, lhs and rhs must have the same size and
keys
+ // We only need to check if they have different nested structure
+ for (size_t i = 0; i < lhs.size(); ++i) {
+ if (lhs[i] != rhs[i]) {
+ VLOG_DEBUG << fmt::format(
+ "Check different structure: {} vs {}, lhs[i].is_nested:
{}, rhs[i].is_nested: "
+ "{}",
+ lhs[i].key, rhs[i].key, lhs[i].is_nested,
rhs[i].is_nested);
+ return true;
+ }
+ }
+ return false;
+}
+
+Status check_variant_has_no_ambiguous_paths(const PathsInData& tuple_paths) {
+ // Group paths by their string representation to reduce comparisons
+ std::unordered_map<std::string, std::vector<size_t>> path_groups;
+
+ for (size_t i = 0; i < tuple_paths.size(); ++i) {
+ // same path should have same structure, so we group them by path
+ path_groups[tuple_paths[i].get_path()].push_back(i);
+ // print part of tuple_paths[i]
+ VLOG_DEBUG << "tuple_paths[i]: " << tuple_paths[i].get_path();
+ for (const auto& part : tuple_paths[i].get_parts()) {
+ VLOG_DEBUG << "part: " << part.key << ", is_nested: " <<
part.is_nested;
+ }
+ }
+
+ // Only compare paths within the same group
+ for (const auto& [path_str, indices] : path_groups) {
+ if (indices.size() <= 1) {
+ continue; // No conflicts possible
+ }
+
+ // Compare all pairs within this group
+ for (size_t i = 0; i < indices.size(); ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ if
(has_different_structure_in_same_path(tuple_paths[indices[i]].get_parts(),
+
tuple_paths[indices[j]].get_parts())) {
+ return Status::DataQualityError(
+ "Ambiguous paths: {} vs {} with different nested
part {} vs {}",
+ tuple_paths[indices[i]].get_path(),
tuple_paths[indices[j]].get_path(),
+ tuple_paths[indices[i]].has_nested_part(),
+ tuple_paths[indices[j]].has_nested_part());
+ }
+ }
+ }
+ }
+ return Status::OK();
+}
+
+Status update_least_schema_internal(const std::map<PathInData, DataTypes>&
subcolumns_types,
+ TabletSchemaSPtr& common_schema, bool
update_sparse_column,
+ int32_t variant_col_unique_id,
+ std::set<PathInData>* path_set = nullptr) {
PathsInData tuple_paths;
DataTypes tuple_types;
CHECK(common_schema.use_count() == 1);
@@ -292,13 +349,18 @@ void update_least_schema_internal(const
std::map<PathInData, DataTypes>& subcolu
path_set->insert(tuple_paths[i]);
}
}
+ return Status::OK();
}
-void update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
- TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
- std::set<PathInData>* path_set) {
+Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
+ TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
+ std::set<PathInData>* path_set) {
// Types of subcolumns by path from all tuples.
std::map<PathInData, DataTypes> subcolumns_types;
+
+ // Collect all paths first to enable batch checking
+ std::vector<PathInData> all_paths;
+
for (const TabletSchemaSPtr& schema : schemas) {
for (const TabletColumnPtr& col : schema->columns()) {
// Get subcolumns of this variant
@@ -306,9 +368,14 @@ void update_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
col->parent_unique_id() == variant_col_unique_id) {
subcolumns_types[*col->path_info_ptr()].push_back(
DataTypeFactory::instance().create_data_type(*col,
col->is_nullable()));
+ all_paths.push_back(*col->path_info_ptr());
}
}
}
+
+ // Batch check for conflicts
+ RETURN_IF_ERROR(check_variant_has_no_ambiguous_paths(all_paths));
+
for (const TabletSchemaSPtr& schema : schemas) {
if (schema->field_index(variant_col_unique_id) == -1) {
// maybe dropped
@@ -326,13 +393,13 @@ void update_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
}
}
}
- update_least_schema_internal(subcolumns_types, common_schema, false,
variant_col_unique_id,
- path_set);
+ return update_least_schema_internal(subcolumns_types, common_schema, false,
+ variant_col_unique_id, path_set);
}
-void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
- TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
- const std::set<PathInData>& path_set) {
+Status update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
+ TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
+ const std::set<PathInData>& path_set) {
// Types of subcolumns by path from all tuples.
std::map<PathInData, DataTypes> subcolumns_types;
for (const TabletSchemaSPtr& schema : schemas) {
@@ -351,7 +418,8 @@ void update_least_sparse_column(const
std::vector<TabletSchemaSPtr>& schemas,
}
}
}
- update_least_schema_internal(subcolumns_types, common_schema, true,
variant_col_unique_id);
+ return update_least_schema_internal(subcolumns_types, common_schema, true,
+ variant_col_unique_id);
}
void inherit_column_attributes(const TabletColumn& source, TabletColumn&
target,
@@ -406,7 +474,6 @@ Status get_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
const TabletSchemaSPtr& base_schema,
TabletSchemaSPtr& output_schema,
bool check_schema_size) {
std::vector<int32_t> variant_column_unique_id;
-
// Construct a schema excluding the extracted columns and gather unique
identifiers for variants.
// Ensure that the output schema also excludes these extracted columns.
This approach prevents
// duplicated paths following the update_least_common_schema process.
@@ -453,9 +520,9 @@ Status get_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
std::set<PathInData> path_set;
// 1. cast extracted column to common type
// path set is used to record the paths of those sparse columns that
have been merged into the extracted columns, eg: v:b
- update_least_common_schema(schemas, output_schema, unique_id,
&path_set);
+ RETURN_IF_ERROR(update_least_common_schema(schemas, output_schema,
unique_id, &path_set));
// 2. cast sparse column to common type, exclude the columns from the
path set
- update_least_sparse_column(schemas, output_schema, unique_id,
path_set);
+ RETURN_IF_ERROR(update_least_sparse_column(schemas, output_schema,
unique_id, path_set));
}
inherit_column_attributes(output_schema);
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index 08ca16c6900..565400b2d38 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -87,6 +87,11 @@ Status parse_variant_columns(Block& block, const
std::vector<int>& variant_pos,
const ParseConfig& config);
Status encode_variant_sparse_subcolumns(ColumnVariant& column);
+// check if the tuple_paths has ambiguous paths
+// situation:
+// throw exception if there exists a prefix with matched names, but not
matched structure (is Nested, number of dimensions).
+Status check_variant_has_no_ambiguous_paths(const std::vector<PathInData>&
paths);
+
// Pick the tablet schema with the highest schema version as the reference.
// Then update all variant columns to there least common types.
// Return the final merged schema as common schema.
@@ -97,13 +102,13 @@ Status get_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
// Get least common types for extracted columns which has Path info,
// with a speicified variant column's unique id
-void update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
- TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
- std::unordered_set<PathInData,
PathInData::Hash>* path_set);
+Status update_least_common_schema(const std::vector<TabletSchemaSPtr>& schemas,
+ TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
+ std::unordered_set<PathInData,
PathInData::Hash>* path_set);
-void update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
- TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
- const std::unordered_set<PathInData,
PathInData::Hash>& path_set);
+Status update_least_sparse_column(const std::vector<TabletSchemaSPtr>& schemas,
+ TabletSchemaSPtr& common_schema, int32_t
variant_col_unique_id,
+ const std::unordered_set<PathInData,
PathInData::Hash>& path_set);
// inherit attributes like index/agg info from it's parent column
void inherit_column_attributes(TabletSchemaSPtr& schema);
diff --git a/be/test/cloud/test_schema_cloud_dictionary_cache.cpp
b/be/test/cloud/test_schema_cloud_dictionary_cache.cpp
index 3d05eb67e45..0fc4fd0c3f5 100644
--- a/be/test/cloud/test_schema_cloud_dictionary_cache.cpp
+++ b/be/test/cloud/test_schema_cloud_dictionary_cache.cpp
@@ -15,9 +15,11 @@
// specific language governing permissions and limitations
// under the License.
+#include "cloud/schema_cloud_dictionary_cache.cpp"
#include "cloud/schema_cloud_dictionary_cache.h"
#include "gen_cpp/olap_file.pb.h"
#include "gtest/gtest.h"
+#include "vec/json/path_in_data.h"
namespace doris {
@@ -175,4 +177,56 @@ TEST(SchemaCloudDictionaryCacheTest,
ReplaceDictKeysToSchema_RefreshFailure) {
EXPECT_FALSE(st.ok());
}
+// Test case 5: replace_schema_to_dict_keys with
tablet_schema.enable_variant_flatten_nested = true
+TEST(SchemaCloudDictionaryCacheTest,
ProcessDictionary_VariantPathConflict_Throws) {
+ SchemaCloudDictionarySPtr dict = std::make_shared<SchemaCloudDictionary>();
+ // construct two variant columns with same unique_id but different
path_info
+ auto& col_dict = *dict->mutable_column_dict();
+ ColumnPB* col1 = &(col_dict)[101];
+ col1->set_unique_id(101);
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("v", false).append("nested", true).append("a", false);
+ vectorized::PathInData path_in_data1 = builder1.build();
+ segment_v2::ColumnPathInfo path_info1;
+ path_in_data1.to_protobuf(&path_info1, 0);
+ col1->mutable_column_path_info()->CopyFrom(path_info1);
+ {
+ RowsetMetaCloudPB rs_meta;
+ rs_meta.set_has_variant_type_in_schema(true);
+ auto* schema = rs_meta.mutable_tablet_schema();
+ schema->set_enable_variant_flatten_nested(true);
+ // add two columns with same key but different is_nested value
+ auto* col_schema1 = schema->add_column();
+ col_schema1->set_unique_id(101);
+ // create pathIndata with same key but different is_nested value
+ vectorized::PathInDataBuilder builder3;
+ builder3.append("v", false).append("nested", false).append("a", false);
+ vectorized::PathInData path_in_data3 = builder3.build();
+ segment_v2::ColumnPathInfo path_info3;
+ path_in_data3.to_protobuf(&path_info3, 0);
+ col_schema1->mutable_column_path_info()->CopyFrom(path_info3);
+ auto st = check_path_amibigus(*dict, &rs_meta);
+ EXPECT_FALSE(st.ok());
+ EXPECT_EQ(st.code(), TStatusCode::DATA_QUALITY_ERROR);
+ }
+
+ {
+ RowsetMetaCloudPB rs_meta;
+ rs_meta.set_has_variant_type_in_schema(true);
+ auto* schema = rs_meta.mutable_tablet_schema();
+ // add two columns with same key but same is_nested value
+ auto* col_schema3 = schema->add_column();
+ col_schema3->set_unique_id(101);
+ vectorized::PathInDataBuilder builder5;
+ builder5.append("v", false).append("nested", true).append("a", false);
+ vectorized::PathInData path_in_data5 = builder5.build();
+ segment_v2::ColumnPathInfo path_info5;
+ path_in_data5.to_protobuf(&path_info5, 0);
+ col_schema3->mutable_column_path_info()->CopyFrom(path_info5);
+ // assert no exception
+ auto st = check_path_amibigus(*dict, &rs_meta);
+ EXPECT_TRUE(st.ok()) << st.to_string();
+ }
+}
+
} // namespace doris
\ No newline at end of file
diff --git a/be/test/common/schema_util_test.cpp
b/be/test/common/schema_util_test.cpp
index fb8b23c10cb..743db751dc1 100644
--- a/be/test/common/schema_util_test.cpp
+++ b/be/test/common/schema_util_test.cpp
@@ -118,4 +118,273 @@ TEST_F(SchemaUtilTest, inherit_column_attributes) {
}
}
+// Test has_different_structure_in_same_path function indirectly through
check_variant_has_no_ambiguous_paths
+TEST_F(SchemaUtilTest, has_different_structure_in_same_path_indirect) {
+ // Test case 1: Same structure and same length - should not detect
ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", false).append("b", false).append("c", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a", false).append("b", false).append("c", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 2: Different keys at same position - should not detect
ambiguity (different keys)
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", false).append("b", false).append("c", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a", false).append("d", false).append("c", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 3: Same keys but different nested structure - should detect
ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", false).append("b", true);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a", false).append("b", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_FALSE(status.ok());
+ EXPECT_TRUE(status.to_string().find("Ambiguous paths") !=
std::string::npos);
+ }
+
+ // Test case 4: Same keys but different anonymous array levels - should
detect ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", true).append("b", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a", false).append("b", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_FALSE(status.ok());
+ EXPECT_TRUE(status.to_string().find("Ambiguous paths") !=
std::string::npos);
+ }
+
+ // Test case 5: Same keys but different nested and anonymous levels -
should detect ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", true).append("b", true);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a", false).append("b", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_FALSE(status.ok());
+ EXPECT_TRUE(status.to_string().find("Ambiguous paths") !=
std::string::npos);
+ }
+
+ // Test case 6: Different lengths - should not detect ambiguity (new
behavior: only check same length paths)
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", false).append("b", false).append("c", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a", false).append("b", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 7: Different lengths with structure difference - should not
detect ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", false).append("b", true).append("c", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a", false).append("b", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 8: Complex nested structure difference with same length -
should detect ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("user", false).append("address",
true).append("street", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("user", false).append("address",
false).append("street", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_FALSE(status.ok());
+ EXPECT_TRUE(status.to_string().find("Ambiguous paths") !=
std::string::npos);
+ }
+
+ // Test case 9: Multiple paths with different lengths - should not detect
ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("config", false).append("database",
false).append("host", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("config", false).append("database", false);
+ paths.emplace_back(builder2.build());
+
+ vectorized::PathInDataBuilder builder3;
+ builder3.append("config", false);
+ paths.emplace_back(builder3.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 10: Empty paths - should not detect ambiguity
+ {
+ vectorized::PathsInData paths;
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 11: Single path - should not detect ambiguity
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("single", false).append("path", false);
+ paths.emplace_back(builder1.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 12: we have path like '{"a.b": "UPPER CASE", "a.c": "lower
case", "a" : {"b" : 123}, "a" : {"c" : 456}}'
+ {
+ vectorized::PathsInData paths;
+ vectorized::PathInDataBuilder builder1;
+ builder1.append("a", false).append("b", false);
+ paths.emplace_back(builder1.build());
+
+ vectorized::PathInDataBuilder builder2;
+ builder2.append("a.b", false);
+ paths.emplace_back(builder2.build());
+
+ auto status =
vectorized::schema_util::check_variant_has_no_ambiguous_paths(paths);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+}
+
+// Test check_path_conflicts_with_existing function indirectly through
update_least_common_schema
+TEST_F(SchemaUtilTest, check_path_conflicts_with_existing) {
+ // Test case 1: No conflicts - should succeed
+ {
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
+ // Create a variant column
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001,
"v1_index", 1,
+ "VARIANT", "v1", IndexType::INVERTED);
+
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ tablet_schema->init_from_pb(schema_pb);
+ std::vector<TabletColumn> subcolumns;
+
+ // Add subcolumns with different paths
+ construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_STRING,
1, "v1.name",
+ &subcolumns);
+ construct_subcolumn(tablet_schema, FieldType::OLAP_FIELD_TYPE_INT, 1,
"v1.age",
+ &subcolumns);
+
+ std::vector<TabletSchemaSPtr> schemas = {tablet_schema};
+ TabletSchemaSPtr output_schema;
+
+ auto status =
vectorized::schema_util::get_least_common_schema(schemas, nullptr,
+
output_schema, false);
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 2: Conflicts with same path but different structure - should
fail
+ {
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
+ // Create a variant column
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001,
"v1_index", 1,
+ "VARIANT", "v1", IndexType::INVERTED);
+
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ tablet_schema->init_from_pb(schema_pb);
+
+ // Add subcolumns with same path but different structure
+ // This would require creating paths with different nested structure
+ // For now, we'll test the basic functionality
+
+ std::vector<TabletSchemaSPtr> schemas = {tablet_schema};
+ TabletSchemaSPtr output_schema;
+
+ auto status =
vectorized::schema_util::get_least_common_schema(schemas, nullptr,
+
output_schema, false);
+ // This should succeed since we don't have conflicting paths in this
simple case
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+
+ // Test case 3: Multiple schemas with conflicting paths - should fail
+ {
+ // Create first schema
+ TabletSchemaPB schema_pb1;
+ schema_pb1.set_keys_type(KeysType::DUP_KEYS);
+ construct_column(schema_pb1.add_column(), schema_pb1.add_index(),
10001, "v1_index", 1,
+ "VARIANT", "v1", IndexType::INVERTED);
+
+ TabletSchemaSPtr tablet_schema1 = std::make_shared<TabletSchema>();
+ tablet_schema1->init_from_pb(schema_pb1);
+ std::vector<TabletColumn> subcolumns;
+ construct_subcolumn(tablet_schema1, FieldType::OLAP_FIELD_TYPE_STRING,
1, "v1.address",
+ &subcolumns);
+
+ // Create second schema with same path but different structure
+ TabletSchemaPB schema_pb2;
+ schema_pb2.set_keys_type(KeysType::DUP_KEYS);
+ construct_column(schema_pb2.add_column(), schema_pb2.add_index(),
10001, "v1_index", 1,
+ "VARIANT", "v1", IndexType::INVERTED);
+
+ TabletSchemaSPtr tablet_schema2 = std::make_shared<TabletSchema>();
+ tablet_schema2->init_from_pb(schema_pb2);
+ std::vector<TabletColumn> subcolumns2;
+ construct_subcolumn(tablet_schema2, FieldType::OLAP_FIELD_TYPE_INT, 1,
"v1.address",
+ &subcolumns2);
+
+ std::vector<TabletSchemaSPtr> schemas = {tablet_schema1,
tablet_schema2};
+ TabletSchemaSPtr output_schema;
+
+ auto status =
vectorized::schema_util::get_least_common_schema(schemas, nullptr,
+
output_schema, false);
+ // This should succeed since the paths are the same and we're just
checking for structure conflicts
+ EXPECT_TRUE(status.ok()) << status.to_string();
+ }
+}
+
} // namespace doris
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index a12c74b04ec..38d41da0480 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -2453,6 +2453,12 @@ public class InternalCatalog implements
CatalogIf<Database> {
boolean variantEnableFlattenNested = false;
try {
variantEnableFlattenNested =
PropertyAnalyzer.analyzeVariantFlattenNested(properties);
+ // session variable: disable_variant_flatten_nested = true
+ // with table property: variant_enable_flatten_nested = true we
should throw error
+ if (ctx.getSessionVariable().getDisableVariantFlattenNested() &&
variantEnableFlattenNested) {
+ throw new DdlException("If you want to enable variant flatten
nested, "
+ + "please set session variable:
disable_variant_flatten_nested = false");
+ }
} catch (AnalysisException e) {
throw new DdlException(e.getMessage());
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 12cc7d6d756..bda47f87f11 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -703,6 +703,10 @@ public class SessionVariable implements Serializable,
Writable {
public static final String DISABLE_INVERTED_INDEX_V1_FOR_VARIANT =
"disable_inverted_index_v1_for_variant";
+ // disable variant flatten nested as session variable, default is true,
+ // which means disable variant flatten nested when create table
+ public static final String DISABLE_VARIANT_FLATTEN_NESTED =
"disable_variant_flatten_nested";
+
// CLOUD_VARIABLES_BEGIN
public static final String CLOUD_CLUSTER = "cloud_cluster";
public static final String DISABLE_EMPTY_PARTITION_PRUNE =
"disable_empty_partition_prune";
@@ -1385,6 +1389,9 @@ public class SessionVariable implements Serializable,
Writable {
@VariableMgr.VarAttr(name = DISABLE_INVERTED_INDEX_V1_FOR_VARIANT,
needForward = true)
private boolean disableInvertedIndexV1ForVaraint = true;
+ @VariableMgr.VarAttr(name = DISABLE_VARIANT_FLATTEN_NESTED, needForward =
true)
+ private boolean disableVariantFlattenNested = true;
+
public int getBeNumberForTest() {
return beNumberForTest;
}
@@ -5024,6 +5031,14 @@ public class SessionVariable implements Serializable,
Writable {
return disableInvertedIndexV1ForVaraint;
}
+ public void setDisableVariantFlattenNested(boolean
disableVariantFlattenNested) {
+ this.disableVariantFlattenNested = disableVariantFlattenNested;
+ }
+
+ public boolean getDisableVariantFlattenNested() {
+ return disableVariantFlattenNested;
+ }
+
public void setProfileLevel(String profileLevel) {
int profileLevelTmp = Integer.valueOf(profileLevel);
if (profileLevelTmp < 1 || profileLevelTmp > 3) {
diff --git a/gensrc/thrift/AgentService.thrift
b/gensrc/thrift/AgentService.thrift
index f807c0c89e7..e4a3a554429 100644
--- a/gensrc/thrift/AgentService.thrift
+++ b/gensrc/thrift/AgentService.thrift
@@ -48,7 +48,7 @@ struct TTabletSchema {
// col unique id for row store column
20: optional list<i32> row_store_col_cids
21: optional i64 row_store_page_size = 16384
- 22: optional bool variant_enable_flatten_nested = false
+ 22: optional bool variant_enable_flatten_nested = false
23: optional i64 storage_page_size = 65536
24: optional i64 storage_dict_page_size = 262144
}
diff --git a/regression-test/data/variant_p0/nested/load.out
b/regression-test/data/variant_p0/nested/load.out
new file mode 100644
index 00000000000..d0cb9d65fae
Binary files /dev/null and b/regression-test/data/variant_p0/nested/load.out
differ
diff --git a/regression-test/data/variant_p0/nested/sql/q01.out
b/regression-test/data/variant_p0/nested/sql/q01.out
new file mode 100644
index 00000000000..ea77db963fe
Binary files /dev/null and b/regression-test/data/variant_p0/nested/sql/q01.out
differ
diff --git a/regression-test/suites/variant_p0/delete_update.groovy
b/regression-test/suites/variant_p0/delete_update.groovy
index 92da76ad3ce..dcae6c628bf 100644
--- a/regression-test/suites/variant_p0/delete_update.groovy
+++ b/regression-test/suites/variant_p0/delete_update.groovy
@@ -21,6 +21,7 @@ suite("regression_test_variant_delete_and_update",
"variant_type"){
// MOR
def table_name = "var_delete_update"
sql "DROP TABLE IF EXISTS ${table_name}"
+ sql """ set disable_variant_flatten_nested = false """
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
k bigint,
diff --git a/regression-test/suites/variant_p0/nested.groovy
b/regression-test/suites/variant_p0/nested.groovy
index 2ccbc82fdc4..9877f91f481 100644
--- a/regression-test/suites/variant_p0/nested.groovy
+++ b/regression-test/suites/variant_p0/nested.groovy
@@ -24,7 +24,7 @@ suite("regression_test_variant_nested", "p0"){
def table_name = "var_nested"
sql "DROP TABLE IF EXISTS ${table_name}"
-
+ sql "set disable_variant_flatten_nested = false"
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
k bigint,
diff --git a/regression-test/suites/variant_p0/nested/load.groovy
b/regression-test/suites/variant_p0/nested/load.groovy
new file mode 100644
index 00000000000..0fee39d4b3d
--- /dev/null
+++ b/regression-test/suites/variant_p0/nested/load.groovy
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// this test is used to test the load of nested array
+suite("variant_nested_type_load", "p0"){
+
+ try {
+
+ // create a table with conflict variant which insert same nested
subcolumn and scalar subcolumn data
+ def table_name = "var_nested_load_conflict"
+ sql "DROP TABLE IF EXISTS ${table_name}"
+ sql """set describe_extend_variant_column = true"""
+
+ // set disable_variant_flatten_nested = true to disable variant
flatten nested which is default behavior
+ sql """ set disable_variant_flatten_nested = true """
+ test {
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table_name} (
+ k bigint,
+ v variant
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really
compaction in conflict case
+ properties("replication_num" = "1",
"disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "true");
+ """
+ exception "If you want to enable variant flatten nested, please
set session variable"
+ }
+
+
+ // set disable_variant_flatten_nested = false to enable variant
flatten nested
+ sql """ set disable_variant_flatten_nested = false """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table_name} (
+ k bigint,
+ v variant
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really
compaction in conflict case
+ properties("replication_num" = "1",
"disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "true");
+ """
+ sql """ insert into ${table_name} values (1, '{"nested": [{"a": 1,
"c": 1.1}, {"b": "1"}]}'); """
+
+ def desc_table = { tn ->
+ sql """ set describe_extend_variant_column = true """
+ sql """ select * from ${tn} order by k """
+ qt_sql_desc """ desc ${tn} """
+ }
+
+ def sql_select_batch = { tn ->
+ qt_sql_0 """select * from ${tn} order by k"""
+
+ qt_sql_1 """select v['nested']['a'] from ${tn} order by k"""
+ qt_sql_2 """select v['nested']['b'] from ${tn} order by k"""
+ qt_sql_3 """select v['nested']['c'] from ${tn} order by k"""
+
+ qt_sql_4 """select v['nested'] from ${tn} order by k"""
+ }
+
+ def sql_test_cast_to_array = { tn ->
+ // test cast to array<int>
+ qt_sql_8 """select cast(v['nested']['a'] as array<int>),
size(cast(v['nested']['a'] as array<int>)) from ${tn} order by k"""
+ qt_sql_9 """select cast(v['nested']['b'] as array<int>),
size(cast(v['nested']['b'] as array<int>)) from ${tn} order by k"""
+ qt_sql_10 """select cast(v['nested']['c'] as array<int>),
size(cast(v['nested']['c'] as array<int>)) from ${tn} order by k"""
+
+ // test cast to array<string>
+ qt_sql_11 """select cast(v['nested']['a'] as array<string>),
size(cast(v['nested']['a'] as array<string>)) from ${tn} order by k"""
+ qt_sql_12 """select cast(v['nested']['b'] as array<string>),
size(cast(v['nested']['b'] as array<string>)) from ${tn} order by k"""
+ qt_sql_13 """select cast(v['nested']['c'] as array<string>),
size(cast(v['nested']['c'] as array<string>)) from ${tn} order by k"""
+
+ // test cast to array<double>
+ qt_sql_14 """select cast(v['nested']['a'] as array<double>),
size(cast(v['nested']['a'] as array<double>)) from ${tn} order by k"""
+ qt_sql_15 """select cast(v['nested']['b'] as array<double>),
size(cast(v['nested']['b'] as array<double>)) from ${tn} order by k"""
+ qt_sql_16 """select cast(v['nested']['c'] as array<double>),
size(cast(v['nested']['c'] as array<double>)) from ${tn} order by k"""
+
+ }
+
+ def sql_test_cast_to_scalar = { tn ->
+ qt_sql_17 """select cast(v['nested']['a'] as int),
cast(v['nested']['b'] as int), cast(v['nested']['c'] as int) from ${tn} order
by k"""
+ qt_sql_18 """select cast(v['nested']['a'] as string),
cast(v['nested']['b'] as string), cast(v['nested']['c'] as string) from ${tn}
order by k"""
+ qt_sql_19 """select cast(v['nested']['a'] as double),
cast(v['nested']['b'] as double), cast(v['nested']['c'] as double) from ${tn}
order by k"""
+ }
+
+ /// insert a array of object for a, b, c
+ // insert structure conflict in one row
+ // a , b, c is Nested array,
+ def table_name_1 = "var_nested_load_no_conflict"
+ sql "DROP TABLE IF EXISTS ${table_name_1}"
+ sql """set describe_extend_variant_column = true"""
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table_name_1} (
+ k bigint,
+ v variant
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1 -- 1 bucket make really
compaction in conflict case
+ properties("replication_num" = "1", "disable_auto_compaction"
= "false", "variant_enable_flatten_nested" = "true");
+ """
+ // insert a array of object for a, b, c first then insert structure
conflict in one row
+ // insert structure conflict in one row
+ // a , b, c is Nested array,
+ sql """
+ insert into ${table_name_1} values (1, '{"nested": [{"a": 1, "c":
1.1}, {"b": "1"}]}');
+ """
+ sql_select_batch(table_name_1)
+ sql_test_cast_to_array(table_name_1)
+ sql_test_cast_to_scalar(table_name_1)
+ // insert structure conflict in one row
+ test {
+ sql """
+ insert into ${table_name_1} values (2, '{"nested": {"a": 2.5,
"b": "123.1"}}');
+ """
+ exception "Ambiguous paths"
+ }
+ // insert more different combination data for a, b, c
+ sql """
+ insert into ${table_name_1} values (3, '{"nested": [{"a": 2.5,
"b": "123.1"}]}');
+ """
+ sql """
+ insert into ${table_name_1} values (4, '{"nested": [{"a": 2.5,
"b": 123.1}]}');
+ """
+ sql """
+ insert into ${table_name_1} values (5, '{"nested": [{"a": 2.5,
"c": "123.1"}, {"b": "123.1"}]}');
+ """
+ sql """
+ insert into ${table_name_1} values (6, '{"nested": [{"a": 2.5},
{"b": 123.1}]}');
+ """
+ sql """
+ insert into ${table_name_1} values (7, '{"nested": [{"a": 2.5},
{"c": 123.1}, {"b": "123.1"}]}');
+ """
+ sql_select_batch(table_name_1)
+ sql_test_cast_to_array(table_name_1)
+ sql_test_cast_to_scalar(table_name_1)
+ // trigger and wait compaction
+ trigger_and_wait_compaction("${table_name_1}", "full")
+ sql_select_batch(table_name_1)
+ sql_test_cast_to_array(table_name_1)
+ sql_test_cast_to_scalar(table_name_1)
+
+ // drop table
+ sql """ drop table ${table_name_1} """
+ sql """ create table ${table_name_1} (k bigint, v variant) duplicate
key(k) distributed by hash(k) buckets 1 properties("replication_num" = "1",
"disable_auto_compaction" = "false", "variant_enable_flatten_nested" = "true")
"""
+ // insert scalar data first then insert structure conflict in one row
+ sql """
+ insert into ${table_name_1} values (1, '{"nested": {"a": 2.5, "b":
"123.1"}}');
+ """
+ sql_select_batch(table_name_1)
+ sql_test_cast_to_array(table_name_1)
+ sql_test_cast_to_scalar(table_name_1)
+ // insert structure conflict in one row: a array of object for a, b, c
+ test {
+ sql """
+ insert into ${table_name_1} values (2, '{"nested": [{"a": 2.5,
"b": "123.1"}]}');
+ """
+ exception "Ambiguous paths"
+ }
+ // insert more different combination data for a, b, c in scalar
+ sql """
+ insert into ${table_name_1} values (3, '{"nested": {"a": 2.5, "b":
123.1}}');
+ """
+ sql """
+ insert into ${table_name_1} values (4, '{"nested": {"a": 2.5, "c":
"123.1"}}');
+ """
+ sql """
+ insert into ${table_name_1} values (5, '{"nested": {"a": 2.5, "c":
123.1}}');
+ """
+ sql """
+ insert into ${table_name_1} values (6, '{"nested": {"a": 2.5, "c":
"123.1"}}');
+ """
+ sql """
+ insert into ${table_name_1} values (7, '{"nested": {"a": 2.5, "b":
"123.1", "c": 123.1}}');
+ """
+ sql_select_batch(table_name_1)
+ sql_test_cast_to_array(table_name_1)
+ sql_test_cast_to_scalar(table_name_1)
+ // trigger and wait compaction
+ trigger_and_wait_compaction("${table_name_1}", "full")
+ sql_select_batch(table_name_1)
+ sql_test_cast_to_array(table_name_1)
+ sql_test_cast_to_scalar(table_name_1)
+
+ } finally {
+ }
+
+}
diff --git a/regression-test/suites/variant_p0/nested/sql/q01.sql
b/regression-test/suites/variant_p0/nested/sql/q01.sql
new file mode 100644
index 00000000000..71ee81428ed
--- /dev/null
+++ b/regression-test/suites/variant_p0/nested/sql/q01.sql
@@ -0,0 +1,13 @@
+-- TABLES: var_nested_load_conflict
+select v['nested']['a'] from var_nested_load_conflict order by k;
+select v['nested']['b'] from var_nested_load_conflict order by k;
+select v['nested']['c'] from var_nested_load_conflict order by k;
+select v['nested'] from var_nested_load_conflict order by k;
+
+select cast(v['nested']['a'] as array<int>), size(cast(v['nested']['a'] as
array<int>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['b'] as array<int>), size(cast(v['nested']['b'] as
array<int>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['c'] as array<int>), size(cast(v['nested']['c'] as
array<int>)) from var_nested_load_conflict order by k;
+
+select cast(v['nested']['a'] as array<string>), size(cast(v['nested']['a'] as
array<string>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['b'] as array<string>), size(cast(v['nested']['b'] as
array<string>)) from var_nested_load_conflict order by k;
+select cast(v['nested']['c'] as array<string>), size(cast(v['nested']['c'] as
array<string>)) from var_nested_load_conflict order by k;
\ No newline at end of file
diff --git a/regression-test/suites/variant_p0/nested2.groovy
b/regression-test/suites/variant_p0/nested2.groovy
index 8d48fcfce9b..75d84a664c2 100644
--- a/regression-test/suites/variant_p0/nested2.groovy
+++ b/regression-test/suites/variant_p0/nested2.groovy
@@ -24,6 +24,7 @@ suite("variant_nested_type_conflict", "p0"){
sql "DROP TABLE IF EXISTS ${table_name}"
sql """set describe_extend_variant_column = true"""
+ sql """ set disable_variant_flatten_nested = false """
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
k bigint,
diff --git
a/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
b/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
index 5c9d85fb8ed..a8b78bdd258 100644
---
a/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
+++
b/regression-test/suites/variant_p0/schema_change/test_double_write_when_schema_change.groovy
@@ -57,6 +57,7 @@ suite("double_write_schema_change_with_variant",
"nonConcurrent") {
def table_name = "github_events"
sql """DROP TABLE IF EXISTS ${table_name}"""
+ sql "set disable_variant_flatten_nested = false"
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
k bigint,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]