This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new a5ddccb9b84 branch-4.1 : [Refactor](Variant) add NestedGroup path
metadata support (#62782)
a5ddccb9b84 is described below
commit a5ddccb9b842340e6fc2696f697dfc90dc222378
Author: lihangyu <[email protected]>
AuthorDate: Thu May 7 21:43:46 2026 +0800
branch-4.1 : [Refactor](Variant) add NestedGroup path metadata support
(#62782)
cherry-pick #62848
---
be/src/core/column/column_variant.cpp | 200 +++++-
be/src/core/column/column_variant.h | 6 +
be/src/core/data_type/data_type_variant.cpp | 5 +-
be/src/core/data_type/data_type_variant.h | 1 +
be/src/exec/common/variant_util.cpp | 212 +++++-
be/src/exprs/function/cast/cast_to_variant.h | 26 +-
be/src/exprs/function/function_variant_element.cpp | 30 +-
be/src/storage/compaction/compaction.cpp | 3 +
.../storage/rowset/vertical_beta_rowset_writer.h | 2 +-
be/src/storage/segment/column_writer.cpp | 21 +-
be/src/storage/segment/variant/nested_group_path.h | 32 +
.../segment/variant/variant_column_reader.cpp | 199 ++++--
.../segment/variant/variant_column_writer_impl.cpp | 4 +-
.../variant_doc_snpashot_compact_iterator.h | 2 +-
.../variant_streaming_compaction_writer.cpp | 21 +
be/src/storage/tablet/tablet_schema.h | 11 +
be/test/exec/common/schema_util_test.cpp | 97 ++-
.../segment/hierarchical_data_iterator_test.cpp | 2 +-
.../segment/variant_column_writer_reader_test.cpp | 707 +++++++++++++++------
.../java/org/apache/doris/catalog/ScalarType.java | 1 +
.../org/apache/doris/analysis/SearchPredicate.java | 44 --
.../apache/doris/common/util/PropertyAnalyzer.java | 4 +-
.../glue/translator/ExpressionTranslator.java | 22 +
.../apache/doris/nereids/types/VariantType.java | 9 +
.../java/org/apache/doris/catalog/TypeTest.java | 8 +
.../doris/nereids/parser/NereidsParserTest.java | 12 +
26 files changed, 1274 insertions(+), 407 deletions(-)
diff --git a/be/src/core/column/column_variant.cpp
b/be/src/core/column/column_variant.cpp
index 5ef2016d243..9938faada16 100644
--- a/be/src/core/column/column_variant.cpp
+++ b/be/src/core/column/column_variant.cpp
@@ -70,6 +70,8 @@
#include "storage/olap_common.h"
#include "util/defer_op.h"
#include "util/json/path_in_data.h"
+#include "util/jsonb_document.h"
+#include "util/jsonb_document_cast.h"
#include "util/jsonb_parser_simd.h"
#include "util/jsonb_utils.h"
#include "util/simd/bits.h"
@@ -395,17 +397,22 @@ void ColumnVariant::Subcolumn::insert_range_from(const
Subcolumn& src, size_t st
data.back()->insert_range_from(*column, from, n);
return;
}
- // When LCT is Array<Variant> (NG data) and the part is scalar, cast
- // would crash. Under DISCARD_SCALAR the scalar part becomes defaults.
+ // When LCT is Array<Variant> (NG data), only true scalar sources
should
+ // be discarded under DISCARD_SCALAR. Mixed regular arrays such as
+ // [null, "plain_text", 123, {"k":"v"}] must still get a chance to cast
+ // into the NG-compatible array type during query-side merges.
if (is_nested_group_type(least_common_type.get())) {
- if (!config::variant_nested_group_discard_scalar_on_conflict) {
- throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
- "NestedGroup type conflict: cannot cast
scalar type {} to "
- "Array<Variant>",
- column_type->get_name());
+ const bool src_is_scalar = get_number_of_dimensions(*column_type)
== 0;
+ if (src_is_scalar) {
+ if (!config::variant_nested_group_discard_scalar_on_conflict) {
+ throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+ "NestedGroup type conflict: cannot
cast scalar type "
+ "{} to Array<Variant>",
+ column_type->get_name());
+ }
+ data.back()->insert_many_defaults(n);
+ return;
}
- data.back()->insert_many_defaults(n);
- return;
}
/// If we need to insert large range, there is no sense to cut part of
column and cast it.
/// Casting of all column and inserting from it can be faster.
@@ -415,6 +422,11 @@ void ColumnVariant::Subcolumn::insert_range_from(const
Subcolumn& src, size_t st
Status st = variant_util::cast_column({column, column_type, ""},
least_common_type.get(),
&casted_column);
if (!st.ok()) {
+ if (is_nested_group_type(least_common_type.get()) &&
+ config::variant_nested_group_discard_scalar_on_conflict) {
+ data.back()->insert_many_defaults(n);
+ return;
+ }
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
st.to_string());
}
data.back()->insert_range_from(*casted_column, from, n);
@@ -424,6 +436,11 @@ void ColumnVariant::Subcolumn::insert_range_from(const
Subcolumn& src, size_t st
Status st = variant_util::cast_column({casted_column, column_type, ""},
least_common_type.get(),
&casted_column);
if (!st.ok()) {
+ if (is_nested_group_type(least_common_type.get()) &&
+ config::variant_nested_group_discard_scalar_on_conflict) {
+ data.back()->insert_many_defaults(n);
+ return;
+ }
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
st.to_string());
}
data.back()->insert_range_from(*casted_column, 0, n);
@@ -679,6 +696,9 @@
ColumnVariant::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_, bo
base_type_id = base_type->get_primitive_type();
}
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count)
+ : ColumnVariant(max_subcolumns_count, false) {}
+
ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode)
: is_nullable(true),
num_rows(0),
@@ -688,6 +708,11 @@ ColumnVariant::ColumnVariant(int32_t max_subcolumns_count,
bool enable_doc_mode)
ENABLE_CHECK_CONSISTENCY(this);
}
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, DataTypePtr
root_type,
+ MutableColumnPtr&& root_column)
+ : ColumnVariant(max_subcolumns_count, false, std::move(root_type),
std::move(root_column)) {
+}
+
ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode,
DataTypePtr root_type, MutableColumnPtr&&
root_column)
: is_nullable(true),
@@ -701,6 +726,9 @@ ColumnVariant::ColumnVariant(int32_t max_subcolumns_count,
bool enable_doc_mode,
ENABLE_CHECK_CONSISTENCY(this);
}
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, Subcolumns&&
subcolumns_)
+ : ColumnVariant(max_subcolumns_count, false, std::move(subcolumns_)) {}
+
ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode,
Subcolumns&& subcolumns_)
: is_nullable(true),
@@ -718,6 +746,9 @@ ColumnVariant::ColumnVariant(int32_t max_subcolumns_count,
bool enable_doc_mode,
serialized_doc_value_column->resize(num_rows);
}
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, size_t size)
+ : ColumnVariant(max_subcolumns_count, false, size) {}
+
ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode, size_t size)
: is_nullable(true),
num_rows(0),
@@ -823,9 +854,12 @@ void ColumnVariant::insert_from(const IColumn& src, size_t
n) {
const auto* src_v = check_and_get_column<ColumnVariant>(src);
ENABLE_CHECK_CONSISTENCY(src_v);
ENABLE_CHECK_CONSISTENCY(this);
- // doc mode fast path: both sides root-only, direct copy root + sparse +
doc_value
- if (_enable_doc_mode) {
- DCHECK(src_v->_enable_doc_mode) << "dst is doc mode but src is not";
+ // Preserve the original root-only copy path for ordinary variant columns.
+ // Reconstructing through try_insert() loses sparse/doc_value structure for
+ // mixed-shape rows and nested-group data.
+ if (src_v->get_subcolumns().size() == 1 && get_subcolumns().size() == 1) {
+ DCHECK(_enable_doc_mode == src_v->_enable_doc_mode)
+ << "root-only variant copy requires matching doc mode";
FieldWithDataType field;
src_v->subcolumns.get_root()->data.get(n, field);
subcolumns.get_mutable_root()->data.insert(field);
@@ -1643,36 +1677,132 @@ struct Prefix {
bool root_is_first_flag = true;
};
-// skip empty nested json:
-// 1. nested array with only nulls, eg. [null. null],todo: think a better way
to deal distinguish array null value and real null value.
-// 2. type is nothing
-bool ColumnVariant::Subcolumn::is_empty_nested(size_t row) const {
- PrimitiveType base_type_id = least_common_type.get_base_type_id();
- const DataTypePtr& type = least_common_type.get();
- if (type->get_primitive_type() == PrimitiveType::TYPE_ARRAY) {
- // check if it is empty nested json array, then skip
- FieldWithDataType field;
- get(row, field);
- if (field.field.get_type() == PrimitiveType::TYPE_ARRAY) {
- const auto& array = field.field.get<TYPE_ARRAY>();
- bool only_nulls_inside = true;
- for (const auto& elem : array) {
- if (elem.get_type() != PrimitiveType::TYPE_NULL) {
- only_nulls_inside = false;
- break;
- }
+enum class NestedJsonSkipKind {
+ kVisible,
+ kNullOnlyArray,
+ kEmptyShellArray,
+ kInvalidType,
+};
+
+static bool is_semantically_empty_jsonb_value(const JsonbValue* value) {
+ if (value == nullptr || value->isNull()) {
+ return true;
+ }
+ if (value->isArray()) {
+ const auto* array = value->unpack<ArrayVal>();
+ for (auto it = array->begin(); it != array->end(); ++it) {
+ if (!is_semantically_empty_jsonb_value(&*it)) {
+ return false;
}
- // if only nulls then skip
- return only_nulls_inside;
}
+ return true;
}
- // skip nothing type
- if (base_type_id == PrimitiveType::INVALID_TYPE) {
+ if (value->isObject()) {
+ const auto* object = value->unpack<ObjectVal>();
+ for (auto it = object->begin(); it != object->end(); ++it) {
+ if (!is_semantically_empty_jsonb_value(it->value())) {
+ return false;
+ }
+ }
return true;
}
return false;
}
+static bool is_semantically_empty_nested_field(const Field& field) {
+ switch (field.get_type()) {
+ case PrimitiveType::TYPE_NULL:
+ return true;
+ case PrimitiveType::TYPE_ARRAY: {
+ const auto& array = field.get<TYPE_ARRAY>();
+ for (const auto& elem : array) {
+ if (!is_semantically_empty_nested_field(elem)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case PrimitiveType::TYPE_VARIANT: {
+ const auto& object = field.get<TYPE_VARIANT>();
+ for (const auto& [_, value] : object) {
+ if (!is_semantically_empty_nested_field(value.field)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case PrimitiveType::TYPE_JSONB: {
+ const auto& jsonb = field.get<TYPE_JSONB>();
+ const JsonbDocument* doc = nullptr;
+ Status st =
+ JsonbDocument::checkAndCreateDocument(jsonb.get_value(),
jsonb.get_size(), &doc);
+ if (!st.ok() || doc == nullptr) {
+ return false;
+ }
+ return is_semantically_empty_jsonb_value(doc->getValue());
+ }
+ default:
+ return false;
+ }
+}
+
+static NestedJsonSkipKind classify_nested_json_skip_kind(const Field& field,
+ PrimitiveType
base_type_id,
+ const DataTypePtr&
type) {
+ if (base_type_id == PrimitiveType::INVALID_TYPE) {
+ return NestedJsonSkipKind::kInvalidType;
+ }
+ if (type->get_primitive_type() != PrimitiveType::TYPE_ARRAY ||
+ field.get_type() != PrimitiveType::TYPE_ARRAY) {
+ return NestedJsonSkipKind::kVisible;
+ }
+
+ const auto& array = field.get<TYPE_ARRAY>();
+ bool only_nulls_inside = true;
+ bool only_empty_shells_inside = true;
+ for (const auto& elem : array) {
+ if (elem.get_type() != PrimitiveType::TYPE_NULL) {
+ only_nulls_inside = false;
+ }
+ if (!is_semantically_empty_nested_field(elem)) {
+ only_empty_shells_inside = false;
+ }
+ if (!only_nulls_inside && !only_empty_shells_inside) {
+ return NestedJsonSkipKind::kVisible;
+ }
+ }
+
+ if (only_nulls_inside) {
+ return NestedJsonSkipKind::kNullOnlyArray;
+ }
+ if (only_empty_shells_inside) {
+ return NestedJsonSkipKind::kEmptyShellArray;
+ }
+ return NestedJsonSkipKind::kVisible;
+}
+
+NestedJsonSkipKind get_nested_json_skip_kind(const ColumnVariant::Subcolumn&
subcolumn,
+ size_t row) {
+ FieldWithDataType field;
+ subcolumn.get(row, field);
+ return classify_nested_json_skip_kind(field.field,
subcolumn.get_least_common_base_type_id(),
+ subcolumn.get_least_common_type());
+}
+
+// Skip nested JSON during row serialization when it carries no visible
payload.
+// For non-root nested subcolumns we ignore array payloads that recursively
serialize to
+// nothing, e.g. [], [null], [{}], or [{"L2": []}].
+bool ColumnVariant::Subcolumn::is_empty_nested(size_t row) const {
+ return get_nested_json_skip_kind(*this, row) !=
NestedJsonSkipKind::kVisible;
+}
+
+// Root array visibility keeps the historical semantics: skip only null-like
arrays and
+// INVALID_TYPE. Empty object arrays such as [{}] must remain visible at the
root level.
+bool ColumnVariant::Subcolumn::is_empty_nested_root_value(size_t row) const {
+ NestedJsonSkipKind kind = get_nested_json_skip_kind(*this, row);
+ return kind == NestedJsonSkipKind::kNullOnlyArray || kind ==
NestedJsonSkipKind::kInvalidType;
+}
+
bool ColumnVariant::is_visible_root_value(size_t nrow) const {
if (is_null_root()) {
return false;
@@ -1685,7 +1815,7 @@ bool ColumnVariant::is_visible_root_value(size_t nrow)
const {
// for top level array we should also use field to check if it is empty
if (root->data.least_common_type.get_type_id() ==
PrimitiveType::TYPE_ARRAY) {
// nested field which field is Array
- return !root->data.is_empty_nested(nrow);
+ return !root->data.is_empty_nested_root_value(nrow);
}
for (const auto& subcolumn : subcolumns) {
if (subcolumn->data.is_root) {
diff --git a/be/src/core/column/column_variant.h
b/be/src/core/column/column_variant.h
index 9b7d84df525..bac4b5fbd1a 100644
--- a/be/src/core/column/column_variant.h
+++ b/be/src/core/column/column_variant.h
@@ -200,6 +200,7 @@ public:
friend class ColumnVariant;
bool is_empty_nested(size_t row) const;
+ bool is_empty_nested_root_value(size_t row) const;
void resize(size_t n);
@@ -301,14 +302,19 @@ public:
private:
friend class COWHelper<IColumn, ColumnVariant>;
// always create root: data type nothing
+ explicit ColumnVariant(int32_t max_subcolumns_count);
explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode);
// always create root: data type nothing
+ explicit ColumnVariant(int32_t max_subcolumns_count, size_t size);
explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
size_t size);
+ explicit ColumnVariant(int32_t max_subcolumns_count, DataTypePtr root_type,
+ MutableColumnPtr&& root_column);
explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
DataTypePtr root_type, MutableColumnPtr&&
root_column);
+ explicit ColumnVariant(int32_t max_subcolumns_count, Subcolumns&&
subcolumns_);
explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
Subcolumns&& subcolumns_);
diff --git a/be/src/core/data_type/data_type_variant.cpp
b/be/src/core/data_type/data_type_variant.cpp
index 8a434d3b7f5..3a8ff4ce716 100644
--- a/be/src/core/data_type/data_type_variant.cpp
+++ b/be/src/core/data_type/data_type_variant.cpp
@@ -46,6 +46,9 @@ class IColumn;
namespace doris {
#include "common/compile_check_begin.h"
+DataTypeVariant::DataTypeVariant(int32_t max_subcolumns_count)
+ : DataTypeVariant(max_subcolumns_count, false) {}
+
DataTypeVariant::DataTypeVariant(int32_t max_subcolumns_count, bool
enable_doc_mode)
: _max_subcolumns_count(max_subcolumns_count),
_enable_doc_mode(enable_doc_mode) {
name = fmt::format("Variant(max subcolumns count = {}, enable doc mode =
{})",
@@ -239,4 +242,4 @@ MutableColumnPtr DataTypeVariant::create_column() const {
return ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
}
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/core/data_type/data_type_variant.h
b/be/src/core/data_type/data_type_variant.h
index d4353b9c858..a55f43b82c3 100644
--- a/be/src/core/data_type/data_type_variant.h
+++ b/be/src/core/data_type/data_type_variant.h
@@ -55,6 +55,7 @@ public:
static constexpr PrimitiveType PType = TYPE_VARIANT;
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_VARIANT; }
DataTypeVariant() = default;
+ DataTypeVariant(int32_t max_subcolumns_count);
DataTypeVariant(int32_t max_subcolumns_count, bool enable_doc_mode);
String do_get_name() const override { return name; }
const std::string get_family_name() const override { return "Variant"; }
diff --git a/be/src/exec/common/variant_util.cpp
b/be/src/exec/common/variant_util.cpp
index bad21fab488..d2f650e03c6 100644
--- a/be/src/exec/common/variant_util.cpp
+++ b/be/src/exec/common/variant_util.cpp
@@ -90,6 +90,7 @@
#include "storage/rowset/rowset.h"
#include "storage/rowset/rowset_fwd.h"
#include "storage/segment/segment_loader.h"
+#include "storage/segment/variant/nested_group_path.h"
#include "storage/segment/variant/variant_column_reader.h"
#include "storage/segment/variant/variant_column_writer_impl.h"
#include "storage/tablet/tablet.h"
@@ -104,6 +105,131 @@
namespace doris::variant_util {
#include "common/compile_check_begin.h"
+bool is_regular_ng_compaction_subpath(const PathInData& path) {
+ const std::string& relative_path = path.get_path();
+ return !relative_path.empty() && !path.has_nested_part() &&
+ !segment_v2::contains_nested_group_marker(relative_path) &&
+ !segment_v2::is_root_nested_group_path(relative_path) &&
+ relative_path != SPARSE_COLUMN_PATH &&
+ relative_path.find(DOC_VALUE_COLUMN_PATH) == std::string::npos;
+}
+
+bool should_keep_existing_ng_compaction_subcolumn(const TabletColumn& column) {
+ if (!column.is_extracted_column()) {
+ return false;
+ }
+ return
is_regular_ng_compaction_subpath(column.path_info_ptr()->copy_pop_front());
+}
+
+TabletIndexes clone_tablet_indexes(const std::vector<const TabletIndex*>&
indexes) {
+ TabletIndexes cloned_indexes;
+ cloned_indexes.reserve(indexes.size());
+ for (const auto* index : indexes) {
+ cloned_indexes.emplace_back(std::make_shared<TabletIndex>(*index));
+ }
+ return cloned_indexes;
+}
+
+void keep_existing_ng_compaction_subcolumn(const TabletSchemaSPtr&
source_schema,
+ const TabletColumnPtr&
extracted_column,
+ TabletSchemaSPtr& output_schema,
+ TabletSchema::PathsSetInfo&
paths_set_info) {
+ DCHECK(extracted_column->is_extracted_column());
+ DCHECK(should_keep_existing_ng_compaction_subcolumn(*extracted_column));
+
+ auto relative_path = extracted_column->path_info_ptr()->copy_pop_front();
+ const std::string path = relative_path.get_path();
+ output_schema->append_column(*extracted_column);
+
+ auto indexes =
clone_tablet_indexes(source_schema->inverted_indexs(*extracted_column));
+ if (extracted_column->path_info_ptr()->get_is_typed()) {
+ TabletSchema::SubColumnInfo sub_column_info {.column =
*extracted_column,
+ .indexes =
std::move(indexes)};
+ paths_set_info.typed_path_set.emplace(path,
std::move(sub_column_info));
+ return;
+ }
+
+ paths_set_info.sub_path_set.emplace(path);
+ if (!indexes.empty()) {
+ paths_set_info.subcolumn_indexes.emplace(path, std::move(indexes));
+ }
+}
+
+using ExistingNgCompactionSubcolumns = std::unordered_map<int32_t,
std::vector<TabletColumnPtr>>;
+
+struct NestedGroupCompactionMaterializationPlan {
+ std::vector<TabletColumnPtr> preserved_regular_subcolumns;
+ std::unordered_set<PathInData, PathInData::Hash>
materialized_regular_paths;
+ PathToDataTypes additional_regular_path_to_data_types;
+};
+
+bool should_preserve_existing_ng_compaction_subcolumns(
+ const TabletColumnPtr& column,
+ const std::unordered_map<int32_t, VariantExtendedInfo>&
uid_to_variant_extended_info) {
+ const auto info_it =
uid_to_variant_extended_info.find(column->unique_id());
+ return column->variant_enable_nested_group() ||
+ (info_it != uid_to_variant_extended_info.end() &&
info_it->second.has_nested_group);
+}
+
+std::unordered_set<int32_t> collect_ng_compaction_root_uids(
+ const TabletSchemaSPtr& target,
+ const std::unordered_map<int32_t, VariantExtendedInfo>&
uid_to_variant_extended_info) {
+ std::unordered_set<int32_t> root_uids;
+ for (const TabletColumnPtr& column : target->columns()) {
+ if (column->is_variant_type() &&
should_preserve_existing_ng_compaction_subcolumns(
+ column,
uid_to_variant_extended_info)) {
+ root_uids.insert(column->unique_id());
+ }
+ }
+ return root_uids;
+}
+
+ExistingNgCompactionSubcolumns collect_existing_ng_compaction_subcolumns(
+ const TabletSchemaSPtr& target, const std::unordered_set<int32_t>&
ng_root_uids) {
+ ExistingNgCompactionSubcolumns uid_to_existing_subcolumns;
+ for (const TabletColumnPtr& column : target->columns()) {
+ if (!column->is_extracted_column() ||
!ng_root_uids.contains(column->parent_unique_id()) ||
+ !should_keep_existing_ng_compaction_subcolumn(*column)) {
+ continue;
+ }
+
uid_to_existing_subcolumns[column->parent_unique_id()].push_back(column);
+ }
+ return uid_to_existing_subcolumns;
+}
+
+NestedGroupCompactionMaterializationPlan
build_nested_group_compaction_materialization_plan(
+ const std::vector<TabletColumnPtr>& existing_subcolumns,
+ const VariantExtendedInfo& extended_info) {
+ NestedGroupCompactionMaterializationPlan plan;
+ plan.preserved_regular_subcolumns = existing_subcolumns;
+ for (const auto& column : existing_subcolumns) {
+
plan.materialized_regular_paths.emplace(column->path_info_ptr()->copy_pop_front());
+ }
+ for (const auto& [path, data_types] : extended_info.path_to_data_types) {
+ if (!is_regular_ng_compaction_subpath(path) ||
+ plan.materialized_regular_paths.contains(path)) {
+ continue;
+ }
+ plan.materialized_regular_paths.emplace(path);
+ plan.additional_regular_path_to_data_types.emplace(path, data_types);
+ }
+ return plan;
+}
+
+void append_nested_group_compaction_columns(const TabletSchemaSPtr& target,
+ const TabletColumnPtr& column,
+ const
NestedGroupCompactionMaterializationPlan& plan,
+ TabletSchemaSPtr& output_schema,
+ TabletSchema::PathsSetInfo&
paths_set_info) {
+ for (const auto& existing_column : plan.preserved_regular_subcolumns) {
+ keep_existing_ng_compaction_subcolumn(target, existing_column,
output_schema,
+ paths_set_info);
+ }
+ VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
+ paths_set_info, column, target,
plan.additional_regular_path_to_data_types,
+ output_schema);
+}
+
size_t get_number_of_dimensions(const IDataType& type) {
if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type)) {
return type_array->get_number_of_dimensions();
@@ -702,9 +828,9 @@ Status
VariantCompactionUtil::aggregate_variant_extended_info(
if (!column->is_variant_type()) {
continue;
}
+ auto& extended_info =
(*uid_to_variant_extended_info)[column->unique_id()];
if (column->variant_enable_nested_group()) {
-
(*uid_to_variant_extended_info)[column->unique_id()].has_nested_group = true;
- continue;
+ extended_info.has_nested_group = true;
}
for (const auto& segment : segment_cache.get_segments()) {
std::shared_ptr<ColumnReader> column_reader;
@@ -723,30 +849,29 @@ Status
VariantCompactionUtil::aggregate_variant_extended_info(
const auto* source_stats = variant_column_reader->get_stats();
CHECK(source_stats);
- // 1. agg path -> stats
- for (const auto& [path, size] :
source_stats->sparse_column_non_null_size) {
- (*uid_to_variant_extended_info)[column->unique_id()]
- .path_to_none_null_values[path] += size;
-
(*uid_to_variant_extended_info)[column->unique_id()].sparse_paths.emplace(path);
- }
+ if (!column->variant_enable_nested_group()) {
+ // NG roots still need type metadata for regular subpaths such
as `v.owner`,
+ // but their compaction schema should not be driven by flat
path stats.
+ for (const auto& [path, size] :
source_stats->sparse_column_non_null_size) {
+ extended_info.path_to_none_null_values[path] += size;
+ extended_info.sparse_paths.emplace(path);
+ }
- for (const auto& [path, size] :
source_stats->subcolumns_non_null_size) {
- (*uid_to_variant_extended_info)[column->unique_id()]
- .path_to_none_null_values[path] += size;
+ for (const auto& [path, size] :
source_stats->subcolumns_non_null_size) {
+ extended_info.path_to_none_null_values[path] += size;
+ }
}
//2. agg path -> schema
- auto& paths_types =
-
(*uid_to_variant_extended_info)[column->unique_id()].path_to_data_types;
- variant_column_reader->get_subcolumns_types(&paths_types);
+
variant_column_reader->get_subcolumns_types(&extended_info.path_to_data_types);
// 3. extract typed paths
- auto& typed_paths =
(*uid_to_variant_extended_info)[column->unique_id()].typed_paths;
- variant_column_reader->get_typed_paths(&typed_paths);
+ variant_column_reader->get_typed_paths(&extended_info.typed_paths);
// 4. extract nested paths
- auto& nested_paths =
(*uid_to_variant_extended_info)[column->unique_id()].nested_paths;
- variant_column_reader->get_nested_paths(&nested_paths);
+ if (!column->variant_enable_nested_group()) {
+
variant_column_reader->get_nested_paths(&extended_info.nested_paths);
+ }
}
}
return Status::OK();
@@ -1028,7 +1153,7 @@ void
VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
DataTypePtr data_type;
get_least_supertype_jsonb(data_types, &data_type);
auto column_name = parent_column->name_lower_case() + "." +
path.get_path();
- auto column_path = PathInData(column_name);
+ auto column_path = PathInData(column_name, path.get_is_typed());
TabletColumn sub_column =
get_column_by_type(data_type, column_name,
ExtraInfo {.unique_id = -1,
@@ -1037,7 +1162,14 @@ void
VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
inherit_column_attributes(*parent_column, sub_column);
TabletIndexes sub_column_indexes;
inherit_index(parent_indexes, sub_column_indexes, sub_column);
- paths_set_info.subcolumn_indexes.emplace(path.get_path(),
std::move(sub_column_indexes));
+ if (path.get_is_typed()) {
+ TabletSchema::SubColumnInfo sub_column_info {.column = sub_column,
+ .indexes =
std::move(sub_column_indexes)};
+ paths_set_info.typed_path_set.emplace(path.get_path(),
std::move(sub_column_info));
+ } else {
+ paths_set_info.subcolumn_indexes.emplace(path.get_path(),
+
std::move(sub_column_indexes));
+ }
output_schema->append_column(sub_column);
VLOG_DEBUG << "append sub column " << path.get_path() << " data type "
<< data_type->get_name();
@@ -1069,6 +1201,9 @@ Status
VariantCompactionUtil::get_extended_compaction_schema(
TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
output_schema->shawdow_copy_without_columns(*target);
std::unordered_map<int32_t, TabletSchema::PathsSetInfo>
uid_to_paths_set_info;
+ const auto ng_root_uids = collect_ng_compaction_root_uids(target,
uid_to_variant_extended_info);
+ const auto uid_to_existing_ng_subcolumns =
+ collect_existing_ng_compaction_subcolumns(target, ng_root_uids);
for (const TabletColumnPtr& column : target->columns()) {
if (!column->is_extracted_column()) {
output_schema->append_column(*column);
@@ -1083,6 +1218,20 @@ Status
VariantCompactionUtil::get_extended_compaction_schema(
const VariantExtendedInfo& extended_info = info_it ==
uid_to_variant_extended_info.end()
?
empty_extended_info
: info_it->second;
+ auto& paths_set_info = uid_to_paths_set_info[column->unique_id()];
+ if (ng_root_uids.contains(column->unique_id())) {
+ const auto plan =
build_nested_group_compaction_materialization_plan(
+ uid_to_existing_ng_subcolumns.contains(column->unique_id())
+ ?
uid_to_existing_ng_subcolumns.at(column->unique_id())
+ : std::vector<TabletColumnPtr> {},
+ extended_info);
+ append_nested_group_compaction_columns(target, column, plan,
output_schema,
+ paths_set_info);
+ LOG(INFO) << "Variant column uid=" << column->unique_id()
+ << " keeps nested-group root with regular extracted
columns in compaction "
+ "schema";
+ continue;
+ }
if (!should_check_variant_path_stats(*column)) {
VLOG_DEBUG << "skip extended schema compaction for variant uid="
<< column->unique_id()
<< " because the column disables variant path stats";
@@ -1108,29 +1257,28 @@ Status
VariantCompactionUtil::get_extended_compaction_schema(
// 1. append typed columns
RETURN_IF_ERROR(get_compaction_typed_columns(target,
extended_info.typed_paths, column,
- output_schema,
-
uid_to_paths_set_info[column->unique_id()]));
+ output_schema,
paths_set_info));
// 2. append nested columns
- RETURN_IF_ERROR(get_compaction_nested_columns(
- extended_info.nested_paths, extended_info.path_to_data_types,
column, output_schema,
- uid_to_paths_set_info[column->unique_id()]));
+
RETURN_IF_ERROR(get_compaction_nested_columns(extended_info.nested_paths,
+
extended_info.path_to_data_types, column,
+ output_schema,
paths_set_info));
// 3. get the subpaths
get_subpaths(column->variant_max_subcolumns_count(),
extended_info.path_to_none_null_values,
- uid_to_paths_set_info[column->unique_id()]);
+ paths_set_info);
// 4. append subcolumns
if (column->variant_max_subcolumns_count() > 0 ||
!column->get_sub_columns().empty()) {
- get_compaction_subcolumns_from_subpaths(
- uid_to_paths_set_info[column->unique_id()], column, target,
- extended_info.path_to_data_types,
extended_info.sparse_paths, output_schema);
+ get_compaction_subcolumns_from_subpaths(paths_set_info, column,
target,
+
extended_info.path_to_data_types,
+
extended_info.sparse_paths, output_schema);
}
// variant_max_subcolumns_count == 0 and no typed paths materialized
// it means that all subcolumns are materialized, may be from old data
else {
- get_compaction_subcolumns_from_data_types(
- uid_to_paths_set_info[column->unique_id()], column, target,
- extended_info.path_to_data_types, output_schema);
+ get_compaction_subcolumns_from_data_types(paths_set_info, column,
target,
+
extended_info.path_to_data_types,
+ output_schema);
}
// append sparse column(s)
diff --git a/be/src/exprs/function/cast/cast_to_variant.h
b/be/src/exprs/function/cast/cast_to_variant.h
index db10d8cd372..3c0e2c1ee92 100644
--- a/be/src/exprs/function/cast/cast_to_variant.h
+++ b/be/src/exprs/function/cast/cast_to_variant.h
@@ -17,6 +17,7 @@
#pragma once
+#include "core/column/column_nullable.h"
#include "core/data_type/data_type_variant.h"
#include "exprs/function/cast/cast_base.h"
#include "exprs/function/cast/cast_to_string.h"
@@ -26,12 +27,15 @@ namespace doris::CastWrapper {
// shared implementation for casting from variant to arbitrary non-nullable
target type
inline Status cast_from_variant_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t
result,
- size_t input_rows_count,
- const NullMap::value_type* /*null_map*/,
+ size_t input_rows_count, const
NullMap::value_type* null_map,
const DataTypePtr& data_type_to) {
const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
const auto& col_from = col_with_type_and_name.column;
- const auto& variant = assert_cast<const ColumnVariant&>(*col_from);
+ const IColumn* variant_column = col_from.get();
+ if (const auto* nullable =
check_and_get_column<ColumnNullable>(*variant_column)) {
+ variant_column = &nullable->get_nested_column();
+ }
+ const auto& variant = assert_cast<const ColumnVariant&>(*variant_column);
ColumnPtr col_to = data_type_to->create_column();
if (!variant.is_finalized()) {
@@ -73,10 +77,11 @@ inline Status cast_from_variant_impl(FunctionContext*
context, Block& block,
col_to = make_nullable(col_to, true);
} else {
col_to = tmp_block.get_by_position(1).column;
- // Note: here we should return the nullable result column
- col_to = wrap_in_nullable(
- col_to, Block({{nested, nested_from_type, ""}, {col_to,
data_type_to, ""}}),
- {0}, input_rows_count);
+ col_to = wrap_in_nullable(col_to,
+ Block({{nested, nested_from_type, ""},
+ {col_from,
col_with_type_and_name.type, ""},
+ {col_to, data_type_to, ""}}),
+ {0, 1}, input_rows_count);
}
} else {
if (variant.only_have_default_values()) {
@@ -100,6 +105,13 @@ inline Status cast_from_variant_impl(FunctionContext*
context, Block& block,
}
}
+ if (null_map == nullptr) {
+ if (const auto* nullable_result =
check_and_get_column<ColumnNullable>(*col_to);
+ nullable_result != nullptr && !nullable_result->has_null()) {
+ col_to = nullable_result->get_nested_column_ptr();
+ }
+ }
+
if (col_to->size() != input_rows_count) {
return Status::InternalError("Unmatched row count {}, expected {}",
col_to->size(),
input_rows_count);
diff --git a/be/src/exprs/function/function_variant_element.cpp
b/be/src/exprs/function/function_variant_element.cpp
index 27963fe30b9..1ae2a4bad82 100644
--- a/be/src/exprs/function/function_variant_element.cpp
+++ b/be/src/exprs/function/function_variant_element.cpp
@@ -207,13 +207,17 @@ private:
assert_cast<const
ColumnString&>(doc_value_data_map.get_keys());
const auto& src_doc_value_data_values =
assert_cast<const
ColumnString&>(doc_value_data_map.get_values());
- // Write extracted data into target's doc_value column (not sparse) to
preserve
- // doc mode invariant: doc_mode columns must not have sparse data.
- auto& doc_value_offsets =
-
assert_cast<ColumnMap&>(*target_ptr->get_doc_value_column()->assume_mutable())
+ const bool write_to_doc_value = target_ptr->enable_doc_mode();
+ // Ordinary Variant extraction keeps the selected prefix in sparse
data, matching the
+ // source branch behavior. Only doc-mode columns keep extracted data
in doc_value.
+ auto& extracted_offsets =
+ assert_cast<ColumnMap&>(*(write_to_doc_value ?
target_ptr->get_doc_value_column()
+ :
target_ptr->get_sparse_column())
+ ->assume_mutable())
.get_offsets();
- auto [doc_value_paths, doc_value_values] =
- target_ptr->get_doc_value_data_paths_and_values();
+ auto [extracted_paths, extracted_values] =
+ write_to_doc_value ?
target_ptr->get_doc_value_data_paths_and_values()
+ :
target_ptr->get_sparse_data_paths_and_values();
StringRef prefix_ref(path.get_path());
std::string_view path_prefix(prefix_ref.data, prefix_ref.size);
for (size_t i = 0; i != src_doc_value_data_offsets.size(); ++i) {
@@ -233,20 +237,24 @@ private:
continue;
}
std::string_view sub_path = *sub_path_optional;
- doc_value_paths->insert_data(sub_path.data(),
sub_path.size());
- doc_value_values->insert_from(src_doc_value_data_values,
lower_bound_index);
+ extracted_paths->insert_data(sub_path.data(),
sub_path.size());
+ extracted_values->insert_from(src_doc_value_data_values,
lower_bound_index);
} else {
root.deserialize_from_binary_column(&src_doc_value_data_values,
lower_bound_index);
}
}
- if (root.size() == doc_value_offsets.size()) {
+ if (root.size() == extracted_offsets.size()) {
root.insert_default();
}
- doc_value_offsets.push_back(doc_value_paths->size());
+ extracted_offsets.push_back(extracted_paths->size());
}
target_ptr->get_subcolumns().create_root(root);
-
target_ptr->get_sparse_column()->assume_mutable()->resize(src_ptr->size());
+ if (write_to_doc_value) {
+
target_ptr->get_sparse_column()->assume_mutable()->resize(src_ptr->size());
+ } else {
+
target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size());
+ }
target_ptr->set_num_rows(src_ptr->size());
}
diff --git a/be/src/storage/compaction/compaction.cpp
b/be/src/storage/compaction/compaction.cpp
index 5c092f8dc90..22434bc083a 100644
--- a/be/src/storage/compaction/compaction.cpp
+++ b/be/src/storage/compaction/compaction.cpp
@@ -264,6 +264,9 @@ Status Compaction::merge_input_rowsets() {
}
RowsetWriterContext ctx;
+ // Propagate input rowset readers into the rowset writer context before
the writer is created.
+ // Variant nested-group compaction uses this metadata to enable the
streaming writer path.
+ ctx.input_rs_readers = input_rs_readers;
RETURN_IF_ERROR(construct_output_rowset_writer(ctx));
// write merged rows to output rowset
diff --git a/be/src/storage/rowset/vertical_beta_rowset_writer.h
b/be/src/storage/rowset/vertical_beta_rowset_writer.h
index 5552fc55985..650cf273478 100644
--- a/be/src/storage/rowset/vertical_beta_rowset_writer.h
+++ b/be/src/storage/rowset/vertical_beta_rowset_writer.h
@@ -61,4 +61,4 @@ private:
size_t _total_key_group_rows = 0;
};
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/storage/segment/column_writer.cpp
b/be/src/storage/segment/column_writer.cpp
index 44864400048..7643ba8a48a 100644
--- a/be/src/storage/segment/column_writer.cpp
+++ b/be/src/storage/segment/column_writer.cpp
@@ -911,7 +911,8 @@ Status OffsetColumnWriter::append_data(const uint8_t** ptr,
size_t num_rows) {
while (remaining > 0) {
size_t num_written = remaining;
RETURN_IF_ERROR(append_data_in_current_page(ptr, &num_written));
- // _next_offset after append_data_in_current_page is the offset of
next data, which will used in finish_current_page() to set
next_array_item_ordinal
+ // Callers provide one extra tail offset after the written rows so the
page footer can
+ // store the next array item ordinal for the current page.
_next_offset = *(const uint64_t*)(*ptr);
remaining -= num_written;
@@ -1184,14 +1185,10 @@ Status ArrayColumnWriter::write_ordinal_index() {
}
Status ArrayColumnWriter::append_nulls(size_t num_rows) {
- size_t num_lengths = num_rows;
- const ordinal_t offset = _item_writer->get_next_rowid();
- while (num_lengths > 0) {
- // TODO llj bulk write
- const auto* offset_ptr = reinterpret_cast<const uint8_t*>(&offset);
- RETURN_IF_ERROR(_offset_writer->append_data(&offset_ptr, 1));
- --num_lengths;
- }
+ const UInt64 offset = cast_set<UInt64>(_item_writer->get_next_rowid());
+ std::vector<UInt64> offsets_data(num_rows + 1, offset);
+ const uint8_t* offsets_ptr = reinterpret_cast<const
uint8_t*>(offsets_data.data());
+ RETURN_IF_ERROR(_offset_writer->append_data(&offsets_ptr, num_rows));
return write_null_column(num_rows, true);
}
@@ -1326,9 +1323,9 @@ Status MapColumnWriter::append_nulls(size_t num_rows) {
for (auto& sub_writer : _kv_writers) {
RETURN_IF_ERROR(sub_writer->append_nulls(num_rows));
}
- const ordinal_t offset = _kv_writers[0]->get_next_rowid();
- std::vector<UInt8> offsets_data(num_rows, cast_set<uint8_t>(offset));
- const uint8_t* offsets_ptr = offsets_data.data();
+ const UInt64 offset = cast_set<UInt64>(_kv_writers[0]->get_next_rowid());
+ std::vector<UInt64> offsets_data(num_rows + 1, offset);
+ const uint8_t* offsets_ptr = reinterpret_cast<const
uint8_t*>(offsets_data.data());
RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows));
if (is_nullable()) {
diff --git a/be/src/storage/segment/variant/nested_group_path.h
b/be/src/storage/segment/variant/nested_group_path.h
index 8cf32790eba..6aaa116c164 100644
--- a/be/src/storage/segment/variant/nested_group_path.h
+++ b/be/src/storage/segment/variant/nested_group_path.h
@@ -65,6 +65,38 @@ inline std::string
strip_nested_group_marker(std::string_view path) {
return out;
}
+inline std::string strip_root_nested_group_path(std::string_view path) {
+ if (path == kRootNestedGroupPath) {
+ return {};
+ }
+ const std::string root_prefix = std::string(kRootNestedGroupPath) + ".";
+ if (path.starts_with(root_prefix)) {
+ return std::string(path.substr(root_prefix.size()));
+ }
+ return std::string(path);
+}
+
+inline std::string build_nested_group_logical_child_path(std::string_view
variant_name,
+ std::string_view
group_logical_path,
+ std::string_view
child_relative_path) {
+ std::string path(variant_name);
+ auto append_path = [&path](std::string_view part) {
+ if (part.empty()) {
+ return;
+ }
+ if (!path.empty()) {
+ path.push_back('.');
+ }
+ path.append(part);
+ };
+
+ const std::string group_path =
+
strip_root_nested_group_path(strip_nested_group_marker(group_logical_path));
+ append_path(group_path);
+ append_path(child_relative_path);
+ return path;
+}
+
inline std::string build_nested_group_offsets_column_name(std::string_view
variant_name,
std::string_view
full_path) {
std::string name;
diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp
b/be/src/storage/segment/variant/variant_column_reader.cpp
index 4b2343ba04c..f1250ab7004 100644
--- a/be/src/storage/segment/variant/variant_column_reader.cpp
+++ b/be/src/storage/segment/variant/variant_column_reader.cpp
@@ -67,6 +67,85 @@ bool is_compaction_or_checksum_reader(const
StorageReadOptions* opts) {
opts->io_ctx.reader_type ==
ReaderType::READER_CHECKSUM);
}
+// Nested-group whole/root-merge iterators dereference NestedGroupReader state
that is owned by
+// VariantColumnReader. Hold the owning reader until the iterator itself is
destroyed so query-time
+// iterator initialization cannot outlive the reader and hit a UAF.
+class ReaderOwnedColumnIterator final : public ColumnIterator {
+public:
+ ReaderOwnedColumnIterator(ColumnIteratorUPtr inner,
std::shared_ptr<ColumnReader> owner)
+ : _inner(std::move(inner)), _owner(std::move(owner)) {
+ DCHECK(_inner != nullptr);
+ set_column_name(_inner->column_name());
+ set_reading_flag(_inner->reading_flag());
+ }
+
+ Status init(const ColumnIteratorOptions& opts) override { return
_inner->init(opts); }
+
+ Status seek_to_ordinal(ordinal_t ord) override { return
_inner->seek_to_ordinal(ord); }
+
+ Status next_batch(size_t* n, MutableColumnPtr& dst, bool* has_null)
override {
+ return _inner->next_batch(n, dst, has_null);
+ }
+
+ Status next_batch_of_zone_map(size_t* n, MutableColumnPtr& dst) override {
+ return _inner->next_batch_of_zone_map(n, dst);
+ }
+
+ Status read_by_rowids(const rowid_t* rowids, const size_t count,
+ MutableColumnPtr& dst) override {
+ return _inner->read_by_rowids(rowids, count, dst);
+ }
+
+ ordinal_t get_current_ordinal() const override { return
_inner->get_current_ordinal(); }
+
+ Status get_row_ranges_by_zone_map(
+ const AndBlockColumnPredicate* col_predicates,
+ const std::vector<std::shared_ptr<const ColumnPredicate>>*
delete_predicates,
+ RowRanges* row_ranges) override {
+ return _inner->get_row_ranges_by_zone_map(col_predicates,
delete_predicates, row_ranges);
+ }
+
+ Status get_row_ranges_by_bloom_filter(const AndBlockColumnPredicate*
col_predicates,
+ RowRanges* row_ranges) override {
+ return _inner->get_row_ranges_by_bloom_filter(col_predicates,
row_ranges);
+ }
+
+ Status get_row_ranges_by_dict(const AndBlockColumnPredicate*
col_predicates,
+ RowRanges* row_ranges) override {
+ return _inner->get_row_ranges_by_dict(col_predicates, row_ranges);
+ }
+
+ bool is_all_dict_encoding() const override { return
_inner->is_all_dict_encoding(); }
+
+ Status set_access_paths(const TColumnAccessPaths& all_access_paths,
+ const TColumnAccessPaths& predicate_access_paths)
override {
+ RETURN_IF_ERROR(_inner->set_access_paths(all_access_paths,
predicate_access_paths));
+ set_reading_flag(_inner->reading_flag());
+ return Status::OK();
+ }
+
+ void set_need_to_read() override {
+ _inner->set_need_to_read();
+ set_reading_flag(_inner->reading_flag());
+ }
+
+ void remove_pruned_sub_iterators() override {
_inner->remove_pruned_sub_iterators(); }
+
+ Status init_prefetcher(const SegmentPrefetchParams& params) override {
+ return _inner->init_prefetcher(params);
+ }
+
+ void collect_prefetchers(
+ std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>&
prefetchers,
+ PrefetcherInitMethod init_method) override {
+ _inner->collect_prefetchers(prefetchers, init_method);
+ }
+
+private:
+ ColumnIteratorUPtr _inner;
+ std::shared_ptr<ColumnReader> _owner;
+};
+
} // namespace
const SubcolumnColumnMetaInfo::Node*
VariantColumnReader::get_subcolumn_meta_by_path(
@@ -328,13 +407,39 @@ Status VariantColumnReader::_build_read_plan_flat_leaves(
int32_t col_uid =
target_col.unique_id() >= 0 ? target_col.unique_id() :
target_col.parent_unique_id();
auto relative_path = target_col.path_info_ptr()->copy_pop_front();
+ const auto* root = _subcolumns_meta_info->get_root();
const auto* node =
target_col.has_path_info() ?
_subcolumns_meta_info->find_leaf(relative_path) : nullptr;
+
+ if (relative_path.empty()) {
+ plan->type = create_variant_type(target_col);
+ plan->relative_path = relative_path;
+ plan->needs_root_merge = _needs_root_nested_group_merge(relative_path);
+ if (_statistics->has_doc_value_column_non_null_size()) {
+ plan->kind = ReadKind::HIERARCHICAL_DOC;
+ plan->root = root;
+ return Status::OK();
+ }
+ plan->kind = ReadKind::ROOT_FLAT;
+ return Status::OK();
+ }
+
if (!relative_path.empty() && _can_use_nested_group_read_path() &&
_try_fill_nested_group_plan(plan, target_col, opts, col_uid,
relative_path)) {
return Status::OK();
}
+ const std::string dot_prefix = relative_path.get_path() + ".";
+ if (target_col.variant_enable_doc_mode() &&
+ _statistics->has_prefix_path_in_doc_value_column(dot_prefix)) {
+ plan->kind = ReadKind::HIERARCHICAL_DOC;
+ plan->type = create_variant_type(target_col);
+ plan->relative_path = relative_path;
+ plan->root = root;
+ plan->needs_root_merge = _needs_root_nested_group_merge(relative_path);
+ return Status::OK();
+ }
+
// compaction need to read flat leaves nodes data to prevent from
amplification
if (!node) {
// Handle sparse column reads in flat-leaf compaction.
@@ -387,7 +492,6 @@ Status VariantColumnReader::_build_read_plan_flat_leaves(
plan->relative_path = relative_path;
return Status::OK();
}
-
// If the path is typed, it means the path is not a sparse column, so
we can't read the sparse column
// even if the sparse column size is reached limit
bool existed_in_sparse_column =
@@ -413,14 +517,6 @@ Status VariantColumnReader::_build_read_plan_flat_leaves(
plan->relative_path = relative_path;
return Status::OK();
}
- if (relative_path.empty()) {
- // root path, use VariantRootColumnIterator
- plan->kind = ReadKind::ROOT_FLAT;
- plan->type = create_variant_type(target_col);
- plan->relative_path = relative_path;
- plan->needs_root_merge = _needs_root_nested_group_merge(relative_path);
- return Status::OK();
- }
VLOG_DEBUG << "new iterator: " << target_col.path_info_ptr()->get_path();
std::shared_ptr<ColumnReader> column_reader;
RETURN_IF_ERROR(column_reader_cache->get_path_column_reader(
@@ -779,19 +875,24 @@ Status VariantColumnReader::_build_read_plan(ReadPlan*
plan, const TabletColumn&
// which means the path maybe exist in sparse_column
bool exceeded_sparse_column_limit =
_is_exceeded_sparse_column_limit_unlocked();
+ const std::string dot_prefix = relative_path.get_path() + ".";
+ if (target_col.variant_enable_doc_mode() &&
+ _statistics->has_prefix_path_in_doc_value_column(dot_prefix)) {
+ plan->kind = ReadKind::HIERARCHICAL_DOC;
+ plan->type = create_variant_type(target_col);
+ plan->relative_path = relative_path;
+ plan->root = root;
+ plan->needs_root_merge = _needs_root_nested_group_merge(relative_path);
+ return Status::OK();
+ }
+
// Check if path is prefix, example sparse columns path: a.b.c, a.b.e,
access prefix: a.b.
// Or access root path
if (_has_prefix_path_unlocked(relative_path)) {
// Example {"b" : {"c":456,"e":7.111}}
// b.c is sparse column, b.e is subcolumn, so b is both the prefix of
sparse column and
- // subcolumn.
- // Doc mode: prefer extracting hierarchy from doc_value column to
preserve doc mode
- // invariant (root-only + doc_value). Non-doc mode: read from
subcolumns + sparse.
- if (target_col.variant_enable_doc_mode()) {
- plan->kind = ReadKind::HIERARCHICAL_DOC;
- } else {
- plan->kind = ReadKind::HIERARCHICAL;
- }
+ // subcolumn
+ plan->kind = ReadKind::HIERARCHICAL;
plan->type = create_variant_type(target_col);
plan->relative_path = relative_path;
plan->node = node;
@@ -830,10 +931,7 @@ Status VariantColumnReader::_build_read_plan(ReadPlan*
plan, const TabletColumn&
return Status::OK();
}
- const std::string dot_prefix = relative_path.get_path() + ".";
- bool has_prefix_in_doc_column =
- _statistics->has_prefix_path_in_doc_value_column(dot_prefix);
- if (has_prefix_in_doc_column) {
+ if (_statistics->has_prefix_path_in_doc_value_column(dot_prefix)) {
plan->kind = ReadKind::HIERARCHICAL_DOC;
plan->type = create_variant_type(target_col);
plan->relative_path = relative_path;
@@ -1015,8 +1113,16 @@ Status
VariantColumnReader::new_iterator(ColumnIteratorUPtr* iterator,
RETURN_IF_ERROR(_build_read_plan(&plan, *target_col, opt,
column_reader_cache,
binary_column_cache_ptr));
// Caller of this overload does not need the storage type; only iterator
is used.
- return _create_iterator_from_plan(iterator, plan, *target_col, opt,
column_reader_cache,
- binary_column_cache_ptr);
+ RETURN_IF_ERROR(_create_iterator_from_plan(iterator, plan, *target_col,
opt,
+ column_reader_cache,
binary_column_cache_ptr));
+ const bool needs_reader_owner = plan.needs_root_merge ||
+ plan.kind == ReadKind::NESTED_GROUP_WHOLE
||
+ plan.kind == ReadKind::NESTED_GROUP_CHILD;
+ if (needs_reader_owner) {
+ *iterator =
std::make_unique<ReaderOwnedColumnIterator>(std::move(*iterator),
+
shared_from_this());
+ }
+ return Status::OK();
}
Status VariantColumnReader::init(const ColumnReaderOptions& opts,
ColumnMetaAccessor* accessor,
@@ -1330,6 +1436,23 @@ TabletIndexes
VariantColumnReader::find_subcolumn_tablet_indexes(const TabletCol
TabletSchema::SubColumnInfo sub_column_info;
const auto& parent_index =
_tablet_schema->inverted_indexs(column.parent_unique_id());
auto relative_path = column.path_info_ptr()->copy_pop_front();
+ DataTypePtr index_data_type = data_type;
+
+ if (!relative_path.empty()) {
+ auto [found, group_chain, child_path] =
+ collect_nested_group_chain(relative_path.get_path());
+ (void)child_path;
+ if (found && !group_chain.empty()) {
+ // NestedGroup leaf readers store the flattened element type.
+ if (data_type->is_nullable()) {
+ auto base =
variant_util::get_base_type_of_array(remove_nullable(data_type));
+ index_data_type = base->is_nullable() ? base :
make_nullable(base);
+ } else {
+ index_data_type =
variant_util::get_base_type_of_array(data_type);
+ }
+ }
+ }
+
// if subcolumn has index, add index to _variant_subcolumns_indexes
if (variant_util::generate_sub_column_info(*_tablet_schema,
column.parent_unique_id(),
relative_path.get_path(),
&sub_column_info) &&
@@ -1338,35 +1461,11 @@ TabletIndexes
VariantColumnReader::find_subcolumn_tablet_indexes(const TabletCol
}
// Otherwise, inherit index from the VARIANT parent column.
- if (!parent_index.empty() && data_type->get_primitive_type() !=
PrimitiveType::TYPE_VARIANT &&
- data_type->get_primitive_type() != PrimitiveType::TYPE_MAP /*SPARSE
COLUMN*/) {
+ if (!parent_index.empty() &&
+ index_data_type->get_primitive_type() != PrimitiveType::TYPE_VARIANT &&
+ index_data_type->get_primitive_type() != PrimitiveType::TYPE_MAP
/*SPARSE COLUMN*/) {
// type in column maynot be real type, so use data_type to get the
real type
PathInData index_path {*column.path_info_ptr()};
- DataTypePtr index_data_type = data_type;
- if (!relative_path.empty()) {
- auto [nested_reader, _] =
find_nested_group_for_path(relative_path.get_path());
- const std::string root_path(kRootNestedGroupPath);
-
- if (nested_reader != nullptr) {
- const bool is_root_ng = nested_reader->array_path == root_path;
- if (!is_root_ng) {
- // Named NG — use variant-relative path (consistent with
write path)
- index_path = relative_path;
- } else {
- // $root NG — prefix path with __D0_root__
- index_path = PathInData(root_path + "." +
relative_path.get_path());
- }
-
- // Unwrap Nullable(Array(...)) → element type for NG subcolumns
- if (data_type->is_nullable()) {
- auto base =
variant_util::get_base_type_of_array(remove_nullable(data_type));
- index_data_type = base->is_nullable() ? base :
make_nullable(base);
- } else {
- index_data_type =
variant_util::get_base_type_of_array(data_type);
- }
- }
- // else: non-NG scalar field — keep index_path and index_data_type
unchanged
- }
TabletColumn target_column =
variant_util::get_column_by_type(index_data_type,
column.name(),
{.unique_id = -1,
diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp
b/be/src/storage/segment/variant/variant_column_writer_impl.cpp
index f70d1565788..30f7e2b2a73 100644
--- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp
+++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp
@@ -1180,7 +1180,9 @@ bool
VariantColumnWriterImpl::_can_use_nested_group_streaming_compaction() const
}
Status VariantColumnWriterImpl::init() {
- if (_can_use_nested_group_streaming_compaction()) {
+ const bool can_use_streaming =
_can_use_nested_group_streaming_compaction();
+
+ if (can_use_streaming) {
_streaming_compaction_writer =
std::make_unique<VariantStreamingCompactionWriter>(
_opts, _tablet_column, _nested_group_provider.get(),
&_statistics);
return _streaming_compaction_writer->init();
diff --git
a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
index 7f2d63a0457..6199c74eccc 100644
--- a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
+++ b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
@@ -68,4 +68,4 @@ private:
#include "common/compile_check_end.h"
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
diff --git
a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
index 0b993ac8a88..2ca6ceec0f8 100644
--- a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
+++ b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
@@ -18,6 +18,7 @@
#include "storage/segment/variant/variant_streaming_compaction_writer.h"
#include <memory>
+#include <unordered_set>
#include "common/cast_set.h"
#include "core/column/column_nullable.h"
@@ -25,6 +26,7 @@
#include "exec/common/variant_util.h"
#include "storage/index/indexed_column_writer.h"
#include "storage/iterator/olap_data_convertor.h"
+#include "storage/rowset/rowset_writer_context.h"
#include "storage/segment/variant/variant_writer_helpers.h"
#include "storage/types.h"
@@ -64,7 +66,26 @@ Status VariantStreamingCompactionWriter::_init_root_writer()
{
Status VariantStreamingCompactionWriter::_init_regular_subcolumn_writers(int&
column_id) {
_streaming_regular_subcolumn_writers.clear();
+ const auto* path_set_info =
+
_opts.rowset_ctx->tablet_schema->try_path_set_info(_tablet_column->unique_id());
+ std::unordered_set<std::string> schema_regular_paths;
+ for (const auto& column : _opts.rowset_ctx->tablet_schema->columns()) {
+ if (!column->is_extracted_column() ||
+ column->parent_unique_id() != _tablet_column->unique_id()) {
+ continue;
+ }
+
schema_regular_paths.emplace(column->path_info_ptr()->copy_pop_front().get_path());
+ }
for (const auto& plan_entry : _streaming_plan.regular_subcolumns) {
+ const bool is_materialized_regular_path =
+ schema_regular_paths.contains(plan_entry.path) ||
+ (path_set_info != nullptr &&
+
path_set_info->contains_materialized_regular_path(plan_entry.path));
+ if (is_materialized_regular_path) {
+ // Compaction schema already decided to materialize this regular
path as a standalone
+ // extracted column. Reopening it here would duplicate the same
writer target.
+ continue;
+ }
TabletColumn tablet_column;
TabletIndexes subcolumn_indexes;
ColumnWriterOptions opts;
diff --git a/be/src/storage/tablet/tablet_schema.h
b/be/src/storage/tablet/tablet_schema.h
index 07fed88d2c4..63af03e8cf6 100644
--- a/be/src/storage/tablet/tablet_schema.h
+++ b/be/src/storage/tablet/tablet_schema.h
@@ -696,6 +696,12 @@ public:
std::unordered_map<std::string, TabletIndexes> subcolumn_indexes; //
subcolumns indexes
PathSet sub_path_set; //
extracted columns
PathSet sparse_path_set; //
sparse columns
+
+ // "Materialized regular path" means compaction chose to store this
path as a dedicated
+ // column in the schema, either typed or extracted, instead of
re-emitting it dynamically.
+ bool contains_materialized_regular_path(const std::string& path) const
{
+ return typed_path_set.contains(path) ||
sub_path_set.contains(path);
+ }
};
void set_path_set_info(std::unordered_map<int32_t, PathsSetInfo>&&
path_set_info_map) {
@@ -706,6 +712,11 @@ public:
return _path_set_info_map.at(unique_id);
}
+ const PathsSetInfo* try_path_set_info(int32_t unique_id) const {
+ auto it = _path_set_info_map.find(unique_id);
+ return it == _path_set_info_map.end() ? nullptr : &it->second;
+ }
+
bool need_record_variant_extended_schema() const { return
variant_max_subcolumns_count() == 0; }
int32_t variant_max_subcolumns_count() const {
diff --git a/be/test/exec/common/schema_util_test.cpp
b/be/test/exec/common/schema_util_test.cpp
index db13e17159d..63ba6452722 100644
--- a/be/test/exec/common/schema_util_test.cpp
+++ b/be/test/exec/common/schema_util_test.cpp
@@ -61,6 +61,21 @@ void construct_column(ColumnPB* column_pb, TabletIndexPB*
tablet_index, int64_t
tablet_index->add_col_unique_id(col_unique_id);
}
+void append_inverted_index_for_path(TabletSchemaSPtr schema, int64_t index_id,
+ const std::string& index_name, int32_t
col_unique_id,
+ const std::string& suffix_path) {
+ TabletIndexPB index_pb;
+ index_pb.set_index_id(index_id);
+ index_pb.set_index_name(index_name);
+ index_pb.set_index_type(IndexType::INVERTED);
+ index_pb.add_col_unique_id(col_unique_id);
+
+ TabletIndex index;
+ index.init_from_pb(index_pb);
+ index.set_escaped_escaped_index_suffix_path(suffix_path);
+ schema->append_index(std::move(index));
+}
+
void construct_subcolumn(TabletSchemaSPtr schema, const FieldType& type,
int32_t col_unique_id,
std::string_view path, std::vector<TabletColumn>*
subcolumns) {
TabletColumn subcol;
@@ -1178,6 +1193,53 @@ TEST_F(SchemaUtilTest, TestGetCompactionSchema) {
EXPECT_EQ(variant_col.get_sub_columns().size(), 0);
}
+TEST_F(SchemaUtilTest,
get_extended_compaction_schema_nested_group_preserves_typed_subcolumns) {
+ TabletColumn variant;
+ variant.set_unique_id(1);
+ variant.set_name("v1");
+ variant.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ variant.set_is_nullable(true);
+ variant.set_variant_enable_nested_group(true);
+ variant.set_variant_max_subcolumns_count(0);
+
+ TabletColumn typed_subcolumn;
+ typed_subcolumn.set_unique_id(-1);
+ typed_subcolumn.set_name("v1.owner");
+ typed_subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ typed_subcolumn.set_is_nullable(true);
+ typed_subcolumn.set_parent_unique_id(1);
+ typed_subcolumn.set_path_info(PathInData("v1.owner", true));
+
+ TabletSchemaSPtr target_schema = std::make_shared<TabletSchema>();
+ target_schema->append_column(variant);
+ target_schema->append_column(typed_subcolumn);
+ append_inverted_index_for_path(target_schema, 30000, "v1_owner_idx", 1,
"v1.owner");
+
+ auto source_indexes = target_schema->inverted_indexs(typed_subcolumn);
+ ASSERT_EQ(source_indexes.size(), 1);
+ EXPECT_EQ(source_indexes[0]->index_name(), "v1_owner_idx");
+
+ std::vector<RowsetSharedPtr> rowsets;
+ auto status =
variant_util::VariantCompactionUtil::get_extended_compaction_schema(
+ rowsets, target_schema);
+ ASSERT_TRUE(status.ok()) << status.to_string();
+
+ EXPECT_EQ(target_schema->num_columns(), 2);
+ const PathInData typed_path("v1.owner", true);
+ const auto typed_column_index = target_schema->field_index(typed_path);
+ ASSERT_NE(typed_column_index, -1);
+
+ const auto& preserved_typed_column =
target_schema->column(typed_column_index);
+ EXPECT_TRUE(preserved_typed_column.path_info_ptr()->get_is_typed());
+ EXPECT_EQ(preserved_typed_column.type(),
FieldType::OLAP_FIELD_TYPE_STRING);
+
+ const auto* path_set_info = target_schema->try_path_set_info(1);
+ ASSERT_NE(path_set_info, nullptr);
+ ASSERT_TRUE(path_set_info->typed_path_set.contains("owner"));
+ EXPECT_EQ(path_set_info->typed_path_set.at("owner").indexes.size(), 1);
+
EXPECT_EQ(path_set_info->typed_path_set.at("owner").indexes[0]->index_name(),
"v1_owner_idx");
+}
+
TEST_F(SchemaUtilTest, TestGetSortedSubcolumns) {
// Create test subcolumns
ColumnVariant::Subcolumns subcolumns;
@@ -1553,6 +1615,9 @@ TEST_F(SchemaUtilTest,
get_compaction_subcolumns_from_data_types) {
path_to_data_types[PathInData("a")] = {std::make_shared<DataTypeInt32>(),
std::make_shared<DataTypeInt64>()};
// -> BIGINT
path_to_data_types[PathInData("b")] =
{std::make_shared<DataTypeString>()}; // -> STRING
+ path_to_data_types[PathInData("typed", true)] =
{std::make_shared<DataTypeString>()};
+ path_to_data_types[PathInData("shared")] =
{std::make_shared<DataTypeInt32>()};
+ path_to_data_types[PathInData("shared", true)] =
{std::make_shared<DataTypeString>()};
TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
TabletSchema::PathsSetInfo paths_set_info;
@@ -1560,22 +1625,41 @@ TEST_F(SchemaUtilTest,
get_compaction_subcolumns_from_data_types) {
variant_util::VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
paths_set_info, parent_column, target, path_to_data_types,
output_schema);
- EXPECT_EQ(output_schema->num_columns(), 2);
- bool found_a = false, found_b = false;
+ EXPECT_EQ(output_schema->num_columns(), 5);
+ bool found_a = false, found_b = false, found_typed = false, found_shared =
false,
+ found_typed_shared = false;
for (const auto& col : output_schema->columns()) {
if (col->name() == "v1.a") {
found_a = true;
EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_BIGINT);
EXPECT_EQ(col->parent_unique_id(), 1);
EXPECT_EQ(col->path_info_ptr()->get_path(), "v1.a");
+ EXPECT_FALSE(col->path_info_ptr()->get_is_typed());
} else if (col->name() == "v1.b") {
found_b = true;
EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_STRING);
EXPECT_EQ(col->parent_unique_id(), 1);
EXPECT_EQ(col->path_info_ptr()->get_path(), "v1.b");
+ EXPECT_FALSE(col->path_info_ptr()->get_is_typed());
+ } else if (col->name() == "v1.typed") {
+ found_typed = true;
+ EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_STRING);
+ EXPECT_EQ(col->parent_unique_id(), 1);
+ EXPECT_EQ(col->path_info_ptr()->get_path(), "v1.typed");
+ EXPECT_TRUE(col->path_info_ptr()->get_is_typed());
+ } else if (col->name() == "v1.shared" &&
!col->path_info_ptr()->get_is_typed()) {
+ found_shared = true;
+ EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_INT);
+ EXPECT_EQ(col->parent_unique_id(), 1);
+ EXPECT_EQ(col->path_info_ptr()->get_path(), "v1.shared");
+ } else if (col->name() == "v1.shared" &&
col->path_info_ptr()->get_is_typed()) {
+ found_typed_shared = true;
+ EXPECT_EQ(col->type(), FieldType::OLAP_FIELD_TYPE_STRING);
+ EXPECT_EQ(col->parent_unique_id(), 1);
+ EXPECT_EQ(col->path_info_ptr()->get_path(), "v1.shared");
}
}
- EXPECT_TRUE(found_a && found_b);
+ EXPECT_TRUE(found_a && found_b && found_typed && found_shared &&
found_typed_shared);
ASSERT_TRUE(paths_set_info.subcolumn_indexes.find("a") !=
paths_set_info.subcolumn_indexes.end());
@@ -1583,6 +1667,13 @@ TEST_F(SchemaUtilTest,
get_compaction_subcolumns_from_data_types) {
paths_set_info.subcolumn_indexes.end());
EXPECT_EQ(paths_set_info.subcolumn_indexes["a"].size(), 1);
EXPECT_EQ(paths_set_info.subcolumn_indexes["b"].size(), 1);
+ EXPECT_FALSE(paths_set_info.subcolumn_indexes.contains("typed"));
+ ASSERT_TRUE(paths_set_info.typed_path_set.contains("typed"));
+ EXPECT_EQ(paths_set_info.typed_path_set.at("typed").indexes.size(), 1);
+ ASSERT_TRUE(paths_set_info.subcolumn_indexes.contains("shared"));
+ ASSERT_TRUE(paths_set_info.typed_path_set.contains("shared"));
+ EXPECT_EQ(paths_set_info.subcolumn_indexes.at("shared").size(), 1);
+ EXPECT_EQ(paths_set_info.typed_path_set.at("shared").indexes.size(), 1);
}
// Test has_different_structure_in_same_path function indirectly through
check_variant_has_no_ambiguous_paths
diff --git a/be/test/storage/segment/hierarchical_data_iterator_test.cpp
b/be/test/storage/segment/hierarchical_data_iterator_test.cpp
index 614e73dff6a..7b51451ef7f 100644
--- a/be/test/storage/segment/hierarchical_data_iterator_test.cpp
+++ b/be/test/storage/segment/hierarchical_data_iterator_test.cpp
@@ -134,4 +134,4 @@ TEST(HierarchicalDataIteratorTest,
ProcessSparseExtractSubpaths) {
EXPECT_EQ(read_offs[0], 0);
EXPECT_EQ(read_offs[1], 1);
-}
\ No newline at end of file
+}
diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp
b/be/test/storage/segment/variant_column_writer_reader_test.cpp
index 7ffcb46cdf8..3de2feb4b33 100644
--- a/be/test/storage/segment/variant_column_writer_reader_test.cpp
+++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp
@@ -20,6 +20,8 @@
#include <thread>
#include "common/config.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_string.h"
#include "core/data_type_serde/data_type_serde.h"
#include "gtest/gtest.h"
#include "storage/rowset/rowset_factory.h"
@@ -28,6 +30,7 @@
#include "storage/segment/column_reader_cache.h"
#include "storage/segment/variant/binary_column_extract_iterator.h"
#include "storage/segment/variant/hierarchical_data_iterator.h"
+#include "storage/segment/variant/nested_group_path.h"
#include "storage/segment/variant/nested_group_streaming_write_plan.h"
#include "storage/segment/variant/sparse_column_merge_iterator.h"
#include "storage/segment/variant/variant_column_reader.h"
@@ -71,13 +74,13 @@ static void construct_column(ColumnPB* column_pb, int32_t
col_unique_id,
}
}
-// static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t
index_id,
-// const std::string& index_name, int32_t
col_unique_id) {
-// tablet_index->set_index_id(index_id);
-// tablet_index->set_index_name(index_name);
-// tablet_index->set_index_type(IndexType::INVERTED);
-// tablet_index->add_col_unique_id(col_unique_id);
-// }
+static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t
index_id,
+ const std::string& index_name, int32_t
col_unique_id) {
+ tablet_index->set_index_id(index_id);
+ tablet_index->set_index_name(index_name);
+ tablet_index->set_index_type(IndexType::INVERTED);
+ tablet_index->add_col_unique_id(col_unique_id);
+}
// MockColumnReaderCache class for testing
class MockColumnReaderCache : public segment_v2::ColumnReaderCache {
@@ -145,9 +148,6 @@ static Status create_variant_root_reader(const
SegmentFooterPB& footer,
return Status::OK();
}
-static std::string expected_doc_bucket_json_from_full(const std::string&
full_json, int bucket_num,
- int bucket_index);
-
class VariantColumnWriterReaderTest : public testing::Test {
public:
void SetUp() override {
@@ -337,193 +337,6 @@ protected:
return Status::OK();
}
- void validate_doc_compact_writer_roundtrip(bool repeat_finish_write_calls)
{
- constexpr int kRows = 200;
- constexpr int kDocBuckets = 4;
- constexpr int kBucket = 0;
-
- TabletSchemaPB schema_pb;
- schema_pb.set_keys_type(KeysType::DUP_KEYS);
- construct_column(schema_pb.add_column(), 1, "VARIANT", "V1", 3, false,
false,
- /*variant_sparse_hash_shard_count=*/0,
- /*variant_enable_doc_mode=*/true,
- /*variant_doc_materialization_min_rows=*/0,
- /*variant_doc_hash_shard_count=*/kDocBuckets);
- _tablet_schema = std::make_shared<TabletSchema>();
- _tablet_schema->init_from_pb(schema_pb);
-
- TabletColumn parent_column = _tablet_schema->column(0);
- TabletColumn extracted_doc_bucket =
- variant_util::create_doc_value_column(parent_column, kBucket);
- extracted_doc_bucket.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
- extracted_doc_bucket.set_is_nullable(false);
- _tablet_schema->append_column(extracted_doc_bucket);
-
- TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
- _tablet_schema->set_external_segment_meta_used_default(false);
- tablet_meta->_tablet_id = 33000;
- _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
- EXPECT_TRUE(_tablet->init().ok());
-
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
-
- io::FileWriterPtr file_writer;
- auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
- auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
- EXPECT_TRUE(st.ok()) << st.msg();
-
- SegmentFooterPB footer;
-
- RowsetWriterContext rowset_ctx;
- rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
- rowset_ctx.tablet_schema = _tablet_schema;
-
- ColumnWriterOptions root_opts;
- root_opts.meta = footer.add_columns();
- root_opts.compression_type = CompressionTypePB::LZ4;
- root_opts.file_writer = file_writer.get();
- root_opts.footer = &footer;
- root_opts.rowset_ctx = &rowset_ctx;
- _init_column_meta(root_opts.meta, 0, parent_column,
CompressionTypePB::LZ4);
-
- std::unique_ptr<ColumnWriter> root_writer;
- EXPECT_TRUE(ColumnWriter::create(root_opts, &parent_column,
file_writer.get(), &root_writer)
- .ok());
- EXPECT_TRUE(root_writer->init().ok());
-
- TabletColumn extracted_doc_bucket_col = _tablet_schema->column(1);
- ColumnWriterOptions doc_compact_opts = root_opts;
- doc_compact_opts.meta = footer.add_columns();
- _init_column_meta(doc_compact_opts.meta, 0, extracted_doc_bucket_col,
- CompressionTypePB::LZ4);
- std::unique_ptr<ColumnWriter> doc_compact_writer;
- EXPECT_TRUE(ColumnWriter::create(doc_compact_opts,
&extracted_doc_bucket_col,
- file_writer.get(),
&doc_compact_writer)
- .ok());
- EXPECT_TRUE(doc_compact_writer->init().ok());
-
- std::unordered_map<int, std::string> inserted_full_json;
- auto type_string = std::make_shared<DataTypeString>();
- auto full_json_column = type_string->create_column();
- auto* full_strings =
assert_cast<ColumnString*>(full_json_column.get());
- VariantUtil::fill_string_column_with_test_data(full_strings, kRows,
&inserted_full_json);
-
- std::unordered_map<int, std::string> expected_bucket_json;
- auto bucket_json_column = type_string->create_column();
- auto* bucket_strings =
assert_cast<ColumnString*>(bucket_json_column.get());
- for (int i = 0; i < kRows; ++i) {
- const std::string& full = inserted_full_json[i];
- std::string bucket_json =
- expected_doc_bucket_json_from_full(full, kDocBuckets,
kBucket);
- expected_bucket_json.emplace(i, bucket_json);
- bucket_strings->insert_data(bucket_json.data(),
bucket_json.size());
- }
-
- ParseConfig config;
- config.deprecated_enable_flatten_nested = false;
- config.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
-
- MutableColumnPtr root_variant =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
- variant_util::parse_json_to_variant(*root_variant, *full_strings,
config);
-
- MutableColumnPtr bucket_variant =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
- variant_util::parse_json_to_variant(*bucket_variant, *bucket_strings,
config);
-
- auto root_data = std::make_unique<VariantColumnData>();
- root_data->column_data = root_variant.get();
- root_data->row_pos = 0;
- const auto* root_ptr = reinterpret_cast<const
uint8_t*>(root_data.get());
- EXPECT_TRUE(root_writer->append_data(&root_ptr, kRows).ok());
-
- auto bucket_data = std::make_unique<VariantColumnData>();
- bucket_data->column_data = bucket_variant.get();
- bucket_data->row_pos = 0;
- const auto* bucket_ptr = reinterpret_cast<const
uint8_t*>(bucket_data.get());
- EXPECT_TRUE(doc_compact_writer->append_data(&bucket_ptr, kRows).ok());
-
- EXPECT_TRUE(root_writer->finish().ok());
- EXPECT_TRUE(doc_compact_writer->finish().ok());
- if (repeat_finish_write_calls) {
- EXPECT_TRUE(doc_compact_writer->finish().ok());
- }
- EXPECT_TRUE(root_writer->write_data().ok());
- EXPECT_TRUE(doc_compact_writer->write_data().ok());
- if (repeat_finish_write_calls) {
- EXPECT_TRUE(doc_compact_writer->write_data().ok());
- EXPECT_TRUE(doc_compact_writer->finish().ok());
- }
- EXPECT_TRUE(root_writer->write_ordinal_index().ok());
- EXPECT_TRUE(doc_compact_writer->write_ordinal_index().ok());
- EXPECT_TRUE(file_writer->close().ok());
- footer.set_num_rows(kRows);
-
- io::FileReaderSPtr file_reader;
- st = io::global_local_filesystem()->open_file(file_path, &file_reader);
- EXPECT_TRUE(st.ok()) << st.msg();
- std::shared_ptr<ColumnReader> column_reader;
- st = create_variant_root_reader(footer, file_reader, _tablet_schema,
&column_reader);
- EXPECT_TRUE(st.ok()) << st.msg();
- auto* variant_column_reader =
assert_cast<VariantColumnReader*>(column_reader.get());
- EXPECT_TRUE(variant_column_reader != nullptr);
-
- bool checked_one_key = false;
- for (int j = 0; j < 10; ++j) {
- const std::string key = "key" + std::to_string(j);
- StringRef ref {key.data(), key.size()};
- if (variant_util::variant_binary_shard_of(ref, kDocBuckets) ==
- static_cast<uint32_t>(kBucket)) {
-
EXPECT_TRUE(variant_column_reader->get_subcolumn_meta_by_path(PathInData(key))
!=
- nullptr);
- checked_one_key = true;
- break;
- }
- }
- EXPECT_TRUE(checked_one_key);
-
- MockColumnReaderCache column_reader_cache(footer, file_reader,
_tablet_schema);
- StorageReadOptions storage_read_opts;
- storage_read_opts.io_ctx.reader_type =
ReaderType::READER_BASE_COMPACTION;
- storage_read_opts.tablet_schema = _tablet_schema;
- OlapReaderStatistics stats;
- storage_read_opts.stats = &stats;
-
- TabletColumn doc_bucket_map =
variant_util::create_doc_value_column(parent_column, kBucket);
- ColumnIteratorUPtr it;
- st = variant_column_reader->new_iterator(&it, &doc_bucket_map,
&storage_read_opts,
- &column_reader_cache);
- EXPECT_TRUE(st.ok()) << st.msg();
-
EXPECT_TRUE(dynamic_cast<segment_v2::VariantDocValueCompactIterator*>(it.get())
!= nullptr);
-
- ColumnIteratorOptions column_iter_opts;
- column_iter_opts.stats = &stats;
- column_iter_opts.file_reader = file_reader.get();
- st = it->init(column_iter_opts);
- EXPECT_TRUE(st.ok()) << st.msg();
-
- DataTypeSerDe::FormatOptions options;
- auto tz = cctz::utc_time_zone();
- options.timezone = &tz;
-
- MutableColumnPtr dst =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
- size_t nrows = kRows;
- st = it->seek_to_ordinal(0);
- EXPECT_TRUE(st.ok()) << st.msg();
- st = it->next_batch(&nrows, dst);
- EXPECT_TRUE(st.ok()) << st.msg();
- EXPECT_EQ(nrows, kRows);
-
- for (int i = 0; i < kRows; ++i) {
- std::string value;
-
assert_cast<ColumnVariant*>(dst.get())->serialize_one_row_to_string(i, &value,
options);
- EXPECT_EQ(value, expected_bucket_json[i]);
- }
-
-
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
- }
-
TabletSchemaSPtr _tablet_schema = nullptr;
StorageEngine* _engine_ref = nullptr;
std::unique_ptr<DataDir> _data_dir = nullptr;
@@ -1721,6 +1534,34 @@ TEST_F(VariantColumnWriterReaderTest,
test_read_doc_compact_from_doc_value_bucke
auto tz = cctz::utc_time_zone();
options.timezone = &tz;
+ ColumnIteratorUPtr root_it;
+ st = variant_column_reader->new_iterator(&root_it, &parent_column,
&storage_read_opts,
+ &column_reader_cache);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(dynamic_cast<HierarchicalDataIterator*>(root_it.get()) !=
nullptr);
+
+ ColumnIteratorOptions root_iter_opts;
+ root_iter_opts.stats = &stats;
+ root_iter_opts.file_reader = file_reader.get();
+ st = root_it->init(root_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ MutableColumnPtr root_dst =
+
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+ size_t root_nrows = kRows;
+ st = root_it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = root_it->next_batch(&root_nrows, root_dst);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_EQ(root_nrows, kRows);
+
+ for (int i = 0; i < kRows; ++i) {
+ std::string value;
+ assert_cast<ColumnVariant*>(root_dst.get())
+ ->serialize_one_row_to_string(i, &value, options);
+ EXPECT_EQ(value, inserted_jsonstr[i]);
+ }
+
// 6. Read and validate each doc value bucket column: should choose
ReadKind::DOC_COMPACT.
for (int bucket = 0; bucket < kDocBuckets; ++bucket) {
TabletColumn doc_bucket_col =
variant_util::create_doc_value_column(parent_column, bucket);
@@ -1756,12 +1597,198 @@ TEST_F(VariantColumnWriterReaderTest,
test_read_doc_compact_from_doc_value_bucke
}
TEST_F(VariantColumnWriterReaderTest,
test_write_doc_compact_writer_and_read_doc_compact) {
- validate_doc_compact_writer_roundtrip(false);
-}
+ constexpr int kRows = 200;
+ constexpr int kDocBuckets = 4;
+ constexpr int kBucket = 0;
-TEST_F(VariantColumnWriterReaderTest,
-
test_write_doc_compact_writer_finish_write_data_idempotent_and_read_doc_compact)
{
- validate_doc_compact_writer_roundtrip(true);
+ // 1. create tablet_schema: root variant is in doc mode; plus one
extracted doc bucket column
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+ construct_column(schema_pb.add_column(), 1, "VARIANT", "V1", 3, false,
false,
+ /*variant_sparse_hash_shard_count=*/0,
+ /*variant_enable_doc_mode=*/true,
+ /*variant_doc_materialization_min_rows=*/0,
+ /*variant_doc_hash_shard_count=*/kDocBuckets);
+ _tablet_schema = std::make_shared<TabletSchema>();
+ _tablet_schema->init_from_pb(schema_pb);
+
+ TabletColumn parent_column = _tablet_schema->column(0);
+ TabletColumn extracted_doc_bucket =
+ variant_util::create_doc_value_column(parent_column, kBucket);
+ // This matches VariantCompactionUtil::get_extended_compaction_schema
behavior:
+ // extracted doc bucket columns are represented as VARIANT to trigger
VariantDocCompactWriter.
+ extracted_doc_bucket.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ extracted_doc_bucket.set_is_nullable(false);
+ _tablet_schema->append_column(extracted_doc_bucket);
+
+ // 2. create tablet
+ TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+ _tablet_schema->set_external_segment_meta_used_default(false);
+ tablet_meta->_tablet_id = 33000;
+ _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+ EXPECT_TRUE(_tablet->init().ok());
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ // 3. create file_writer
+ io::FileWriterPtr file_writer;
+ auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+ auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ // 4. create column writers: root VariantColumnWriter + extracted
VariantDocCompactWriter
+ SegmentFooterPB footer;
+
+ RowsetWriterContext rowset_ctx;
+ rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+ rowset_ctx.tablet_schema = _tablet_schema;
+
+ ColumnWriterOptions root_opts;
+ root_opts.meta = footer.add_columns();
+ root_opts.compression_type = CompressionTypePB::LZ4;
+ root_opts.file_writer = file_writer.get();
+ root_opts.footer = &footer;
+ root_opts.rowset_ctx = &rowset_ctx;
+ _init_column_meta(root_opts.meta, 0, parent_column,
CompressionTypePB::LZ4);
+
+ std::unique_ptr<ColumnWriter> root_writer;
+ EXPECT_TRUE(
+ ColumnWriter::create(root_opts, &parent_column, file_writer.get(),
&root_writer).ok());
+ EXPECT_TRUE(root_writer->init().ok());
+
+ TabletColumn extracted_doc_bucket_col = _tablet_schema->column(1);
+ ColumnWriterOptions doc_compact_opts = root_opts;
+ doc_compact_opts.meta = footer.add_columns();
+ _init_column_meta(doc_compact_opts.meta, 0, extracted_doc_bucket_col,
CompressionTypePB::LZ4);
+ std::unique_ptr<ColumnWriter> doc_compact_writer;
+ EXPECT_TRUE(ColumnWriter::create(doc_compact_opts,
&extracted_doc_bucket_col, file_writer.get(),
+ &doc_compact_writer)
+ .ok());
+ EXPECT_TRUE(doc_compact_writer->init().ok());
+
+ // 5. build doc-value-only data:
+ // - root column uses the full JSON (doc values only is enough for this
test)
+ // - extracted doc bucket column uses bucket-filtered JSON so that doc
bucket data matches
+ // the bucket index expected by VariantDocCompactWriter.
+ std::unordered_map<int, std::string> inserted_full_json;
+ auto type_string = std::make_shared<DataTypeString>();
+ auto full_json_column = type_string->create_column();
+ auto* full_strings = assert_cast<ColumnString*>(full_json_column.get());
+ VariantUtil::fill_string_column_with_test_data(full_strings, kRows,
&inserted_full_json);
+
+ std::unordered_map<int, std::string> expected_bucket_json;
+ auto bucket_json_column = type_string->create_column();
+ auto* bucket_strings =
assert_cast<ColumnString*>(bucket_json_column.get());
+ for (int i = 0; i < kRows; ++i) {
+ const std::string& full = inserted_full_json[i];
+ std::string bucket_json = expected_doc_bucket_json_from_full(full,
kDocBuckets, kBucket);
+ expected_bucket_json.emplace(i, bucket_json);
+ bucket_strings->insert_data(bucket_json.data(), bucket_json.size());
+ }
+
+ ParseConfig config;
+ config.deprecated_enable_flatten_nested = false;
+ config.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
+
+ MutableColumnPtr root_variant = ColumnVariant::create(
+ parent_column.variant_max_subcolumns_count(),
parent_column.variant_enable_doc_mode());
+ variant_util::parse_json_to_variant(*root_variant, *full_strings, config);
+
+ MutableColumnPtr bucket_variant = ColumnVariant::create(
+ parent_column.variant_max_subcolumns_count(),
parent_column.variant_enable_doc_mode());
+ variant_util::parse_json_to_variant(*bucket_variant, *bucket_strings,
config);
+
+ // 6. append and write
+ {
+ auto root_data = std::make_unique<VariantColumnData>();
+ root_data->column_data = root_variant.get();
+ root_data->row_pos = 0;
+ const auto* data = reinterpret_cast<const uint8_t*>(root_data.get());
+ EXPECT_TRUE(root_writer->append_data(&data, kRows).ok());
+ }
+ {
+ auto bucket_data = std::make_unique<VariantColumnData>();
+ bucket_data->column_data = bucket_variant.get();
+ bucket_data->row_pos = 0;
+ const auto* data = reinterpret_cast<const uint8_t*>(bucket_data.get());
+ EXPECT_TRUE(doc_compact_writer->append_data(&data, kRows).ok());
+ }
+
+ EXPECT_TRUE(root_writer->finish().ok());
+ EXPECT_TRUE(doc_compact_writer->finish().ok());
+ EXPECT_TRUE(root_writer->write_data().ok());
+ EXPECT_TRUE(doc_compact_writer->write_data().ok());
+ EXPECT_TRUE(root_writer->write_ordinal_index().ok());
+ EXPECT_TRUE(doc_compact_writer->write_ordinal_index().ok());
+ EXPECT_TRUE(file_writer->close().ok());
+ footer.set_num_rows(kRows);
+
+ // 7. open reader and validate:
+ // - doc bucket can be read via DOC_COMPACT iterator in flat-leaf
compaction mode
+ // - materialized leaf meta exists for at least one key in this bucket
+ io::FileReaderSPtr file_reader;
+ st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ std::shared_ptr<ColumnReader> column_reader;
+ st = create_variant_root_reader(footer, file_reader, _tablet_schema,
&column_reader);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ auto* variant_column_reader =
assert_cast<VariantColumnReader*>(column_reader.get());
+ EXPECT_TRUE(variant_column_reader != nullptr);
+
+ bool checked_one_key = false;
+ for (int j = 0; j < 10; ++j) {
+ const std::string key = "key" + std::to_string(j);
+ StringRef ref {key.data(), key.size()};
+ if (variant_util::variant_binary_shard_of(ref, kDocBuckets) ==
+ static_cast<uint32_t>(kBucket)) {
+
EXPECT_TRUE(variant_column_reader->get_subcolumn_meta_by_path(PathInData(key))
!=
+ nullptr);
+ checked_one_key = true;
+ break;
+ }
+ }
+ EXPECT_TRUE(checked_one_key);
+
+ MockColumnReaderCache column_reader_cache(footer, file_reader,
_tablet_schema);
+ StorageReadOptions storage_read_opts;
+ storage_read_opts.io_ctx.reader_type = ReaderType::READER_BASE_COMPACTION;
+ storage_read_opts.tablet_schema = _tablet_schema;
+ OlapReaderStatistics stats;
+ storage_read_opts.stats = &stats;
+
+ TabletColumn doc_bucket_map =
variant_util::create_doc_value_column(parent_column, kBucket);
+ ColumnIteratorUPtr it;
+ st = variant_column_reader->new_iterator(&it, &doc_bucket_map,
&storage_read_opts,
+ &column_reader_cache);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
EXPECT_TRUE(dynamic_cast<segment_v2::VariantDocValueCompactIterator*>(it.get())
!= nullptr);
+
+ ColumnIteratorOptions column_iter_opts;
+ column_iter_opts.stats = &stats;
+ column_iter_opts.file_reader = file_reader.get();
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ DataTypeSerDe::FormatOptions options;
+ auto tz = cctz::utc_time_zone();
+ options.timezone = &tz;
+
+ MutableColumnPtr dst =
+
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+ size_t nrows = kRows;
+ st = it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = it->next_batch(&nrows, dst);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_EQ(nrows, kRows);
+
+ for (int i = 0; i < kRows; ++i) {
+ std::string value;
+ assert_cast<ColumnVariant*>(dst.get())->serialize_one_row_to_string(i,
&value, options);
+ EXPECT_EQ(value, expected_bucket_json[i]);
+ }
+
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
}
TEST_F(VariantColumnWriterReaderTest, test_doc_compact_sparse_write_array_gap)
{
@@ -1838,8 +1865,8 @@ TEST_F(VariantColumnWriterReaderTest,
test_doc_compact_sparse_write_array_gap) {
parse_cfg.deprecated_enable_flatten_nested = false;
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
- MutableColumnPtr bucket_variant =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
+ MutableColumnPtr bucket_variant = ColumnVariant::create(
+ parent_column.variant_max_subcolumns_count(),
parent_column.variant_enable_doc_mode());
variant_util::parse_json_to_variant(*bucket_variant, *strings, parse_cfg);
auto bucket_data = std::make_unique<VariantColumnData>();
@@ -1939,8 +1966,8 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_doc_sparse_write_array_gap_and_
parse_cfg.deprecated_enable_flatten_nested = false;
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
- MutableColumnPtr variant_column =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
+ MutableColumnPtr variant_column = ColumnVariant::create(
+ parent_column.variant_max_subcolumns_count(),
parent_column.variant_enable_doc_mode());
variant_util::parse_json_to_variant(*variant_column, *strings, parse_cfg);
auto variant_data = std::make_unique<VariantColumnData>();
@@ -2303,6 +2330,274 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_sub_index) {
EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
}
+TEST_F(VariantColumnWriterReaderTest,
test_find_subcolumn_tablet_indexes_inherits_full_path) {
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ construct_column(schema_pb.add_column(), 1, "VARIANT", "v", 10, false);
+ _tablet_schema = std::make_shared<TabletSchema>();
+ _tablet_schema->init_from_pb(schema_pb);
+
+ TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+ _tablet_schema->set_external_segment_meta_used_default(false);
+ tablet_meta->_tablet_id = 10001;
+ _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+ ASSERT_TRUE(_tablet->init().ok());
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ io::FileWriterPtr file_writer;
+ auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+ auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
+ ASSERT_TRUE(st.ok()) << st.msg();
+
+ SegmentFooterPB footer;
+ ColumnWriterOptions opts;
+ opts.meta = footer.add_columns();
+ opts.compression_type = CompressionTypePB::LZ4;
+ opts.file_writer = file_writer.get();
+ opts.footer = &footer;
+ RowsetWriterContext rowset_ctx;
+ rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+ rowset_ctx.tablet_schema = _tablet_schema;
+ opts.rowset_ctx = &rowset_ctx;
+ TabletColumn column = _tablet_schema->column(0);
+ _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
+
+ std::unique_ptr<ColumnWriter> writer;
+ ASSERT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(),
&writer).ok());
+ ASSERT_TRUE(writer->init().ok());
+ ASSERT_TRUE(append_json_batch(writer.get(), {R"({"a": "x"})"}).ok());
+ ASSERT_TRUE(writer->finish().ok());
+ ASSERT_TRUE(writer->write_data().ok());
+ ASSERT_TRUE(writer->write_ordinal_index().ok());
+ ASSERT_TRUE(writer->write_zone_map().ok());
+ ASSERT_TRUE(file_writer->close().ok());
+ footer.set_num_rows(1);
+
+ TabletIndexPB index_pb;
+ construct_tablet_index(&index_pb, 10001, "idx_v", column.unique_id());
+ TabletIndex parent_index;
+ parent_index.init_from_pb(index_pb);
+ _tablet_schema->append_index(std::move(parent_index));
+
+ io::FileReaderSPtr file_reader;
+ st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+ ASSERT_TRUE(st.ok()) << st.msg();
+ std::shared_ptr<segment_v2::ColumnReader> column_reader;
+ st = create_variant_root_reader(footer, file_reader, _tablet_schema,
&column_reader);
+ ASSERT_TRUE(st.ok()) << st.msg();
+ auto* variant_reader =
assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
+ ASSERT_NE(variant_reader, nullptr);
+
+ TabletColumn subcolumn;
+ subcolumn.set_name("v.a");
+ subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ subcolumn.set_parent_unique_id(column.unique_id());
+ subcolumn.set_path_info(PathInData("v.a"));
+ subcolumn.set_is_nullable(true);
+
+ auto indexes = variant_reader->find_subcolumn_tablet_indexes(
+ subcolumn, std::make_shared<DataTypeString>());
+ ASSERT_EQ(indexes.size(), 1);
+ EXPECT_EQ(indexes[0]->index_id(), 10001);
+ EXPECT_EQ(indexes[0]->get_index_suffix(), "v%2Ea");
+ EXPECT_NE(indexes[0]->get_index_suffix(), "a");
+}
+
+TEST_F(VariantColumnWriterReaderTest,
test_nested_group_logical_index_path_uses_variant_root) {
+ EXPECT_EQ(segment_v2::build_nested_group_logical_child_path("v", "arr",
"x"), "v.arr.x");
+ EXPECT_EQ(segment_v2::build_nested_group_logical_child_path("v",
"arr.inner", "z"),
+ "v.arr.inner.z");
+ EXPECT_EQ(segment_v2::build_nested_group_logical_child_path(
+ "v", std::string(segment_v2::kRootNestedGroupPath), "x"),
+ "v.x");
+ EXPECT_EQ(segment_v2::build_nested_group_logical_child_path(
+ "v", std::string(segment_v2::kRootNestedGroupPath) +
".inner", "z"),
+ "v.inner.z");
+}
+
+TEST_F(VariantColumnWriterReaderTest,
test_find_subcolumn_tablet_indexes_branch_coverage) {
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ construct_column(schema_pb.add_column(), 1, "VARIANT", "v", 10, false);
+ _tablet_schema = std::make_shared<TabletSchema>();
+ _tablet_schema->init_from_pb(schema_pb);
+
+ TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+ _tablet_schema->set_external_segment_meta_used_default(false);
+ tablet_meta->_tablet_id = 10002;
+ _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+ ASSERT_TRUE(_tablet->init().ok());
+
ASSERT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
ASSERT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ io::FileWriterPtr file_writer;
+ auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+ auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
+ ASSERT_TRUE(st.ok()) << st.msg();
+
+ SegmentFooterPB footer;
+ ColumnWriterOptions opts;
+ opts.meta = footer.add_columns();
+ opts.compression_type = CompressionTypePB::LZ4;
+ opts.file_writer = file_writer.get();
+ opts.footer = &footer;
+ RowsetWriterContext rowset_ctx;
+ rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+ rowset_ctx.tablet_schema = _tablet_schema;
+ opts.rowset_ctx = &rowset_ctx;
+ TabletColumn root_column = _tablet_schema->column(0);
+ _init_column_meta(opts.meta, 0, root_column, CompressionTypePB::LZ4);
+
+ std::unique_ptr<ColumnWriter> writer;
+ ASSERT_TRUE(ColumnWriter::create(opts, &root_column, file_writer.get(),
&writer).ok());
+ ASSERT_TRUE(writer->init().ok());
+ ASSERT_TRUE(append_json_batch(writer.get(), {R"({"a": "x", "own":
"y"})"}).ok());
+ ASSERT_TRUE(writer->finish().ok());
+ ASSERT_TRUE(writer->write_data().ok());
+ ASSERT_TRUE(writer->write_ordinal_index().ok());
+ ASSERT_TRUE(writer->write_zone_map().ok());
+ ASSERT_TRUE(file_writer->close().ok());
+ footer.set_num_rows(1);
+
+ io::FileReaderSPtr file_reader;
+ st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+ ASSERT_TRUE(st.ok()) << st.msg();
+ std::shared_ptr<segment_v2::ColumnReader> column_reader;
+ st = create_variant_root_reader(footer, file_reader, _tablet_schema,
&column_reader);
+ ASSERT_TRUE(st.ok()) << st.msg();
+ auto* variant_reader =
assert_cast<segment_v2::VariantColumnReader*>(column_reader.get());
+ ASSERT_NE(variant_reader, nullptr);
+
+ const int32_t root_unique_id = root_column.unique_id();
+ auto make_subcolumn = [&](std::string name, FieldType type, std::string
path,
+ int32_t parent_unique_id) {
+ TabletColumn subcolumn;
+ subcolumn.set_name(std::move(name));
+ subcolumn.set_type(type);
+ subcolumn.set_parent_unique_id(parent_unique_id);
+ subcolumn.set_unique_id(2001);
+ subcolumn.set_path_info(PathInData(std::move(path)));
+ subcolumn.set_is_nullable(true);
+ return subcolumn;
+ };
+
+ {
+ auto no_parent_index = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.missing", FieldType::OLAP_FIELD_TYPE_STRING,
"v.missing",
+ root_unique_id),
+ std::make_shared<DataTypeString>());
+ EXPECT_TRUE(no_parent_index.empty());
+ }
+
+ TabletIndexPB parent_index_pb;
+ construct_tablet_index(&parent_index_pb, 10002, "idx_v",
root_column.unique_id());
+ TabletIndex parent_index;
+ parent_index.init_from_pb(parent_index_pb);
+ _tablet_schema->append_index(std::move(parent_index));
+
+ {
+ auto inherited = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.a", FieldType::OLAP_FIELD_TYPE_STRING,
"v.a", root_unique_id),
+ std::make_shared<DataTypeString>());
+ ASSERT_EQ(inherited.size(), 1);
+ EXPECT_EQ(inherited[0]->index_id(), 10002);
+ EXPECT_EQ(inherited[0]->get_index_suffix(), "v%2Ea");
+ }
+
+ {
+ auto plain_array = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.plainarr", FieldType::OLAP_FIELD_TYPE_ARRAY,
"v.plainarr",
+ root_unique_id),
+ std::make_shared<DataTypeNullable>(
+
std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())));
+ ASSERT_EQ(plain_array.size(), 1);
+ EXPECT_EQ(plain_array[0]->index_id(), 10002);
+ EXPECT_EQ(plain_array[0]->get_index_suffix(), "v%2Eplainarr");
+ EXPECT_NE(plain_array[0]->get_index_suffix(), "plainarr");
+ }
+
+ {
+ auto variant_type = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.object", FieldType::OLAP_FIELD_TYPE_VARIANT,
"v.object",
+ root_unique_id),
+ std::make_shared<DataTypeVariant>(10, false));
+ EXPECT_TRUE(variant_type.empty());
+ }
+
+ {
+ auto sparse_map_type = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.__sparse", FieldType::OLAP_FIELD_TYPE_MAP,
"v.__sparse",
+ root_unique_id),
+
std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(),
+
std::make_shared<DataTypeString>()));
+ EXPECT_TRUE(sparse_map_type.empty());
+ }
+
+ TabletColumn indexed_subcolumn;
+ indexed_subcolumn.set_name("own");
+ indexed_subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ _tablet_schema->mutable_column_by_uid(root_column.unique_id())
+ .add_sub_column(indexed_subcolumn);
+
+ TabletIndexPB own_index_pb;
+ construct_tablet_index(&own_index_pb, 10003, "idx_v_own",
root_column.unique_id());
+ (*own_index_pb.mutable_properties())["field_pattern"] = "own";
+ TabletIndex own_index;
+ own_index.init_from_pb(own_index_pb);
+ _tablet_schema->append_index(std::move(own_index));
+
+ {
+ auto own = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.own", FieldType::OLAP_FIELD_TYPE_STRING,
"v.own", root_unique_id),
+ std::make_shared<DataTypeString>());
+ ASSERT_EQ(own.size(), 1);
+ EXPECT_EQ(own[0]->index_id(), 10003);
+ EXPECT_EQ(own[0]->get_index_suffix(), "v%2Eown");
+ }
+
+ auto group_reader = std::make_unique<segment_v2::NestedGroupReader>();
+ group_reader->array_path = "arr";
+ group_reader->offsets_reader =
std::make_shared<segment_v2::ColumnReader>();
+ group_reader->child_readers.emplace("x", nullptr);
+ auto& nested_group_readers =
+
const_cast<segment_v2::NestedGroupReaders&>(variant_reader->get_nested_group_readers());
+ nested_group_readers.emplace("arr", std::move(group_reader));
+
+ {
+ auto nested = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.arr.x", FieldType::OLAP_FIELD_TYPE_ARRAY,
"v.arr.x",
+ root_unique_id),
+ std::make_shared<DataTypeNullable>(
+
std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())));
+ ASSERT_EQ(nested.size(), 1);
+ EXPECT_EQ(nested[0]->index_id(), 10002);
+ EXPECT_EQ(nested[0]->get_index_suffix(), "v%2Earr%2Ex");
+ EXPECT_NE(nested[0]->get_index_suffix(), "arr%2Ex");
+ }
+
+ auto nested_group_reader =
std::make_unique<segment_v2::NestedGroupReader>();
+ nested_group_reader->array_path = "inner";
+ nested_group_reader->offsets_reader =
std::make_shared<segment_v2::ColumnReader>();
+ nested_group_reader->child_readers.emplace("z", nullptr);
+ nested_group_readers.at("arr")->nested_group_readers.emplace("inner",
+
std::move(nested_group_reader));
+
+ {
+ auto nested = variant_reader->find_subcolumn_tablet_indexes(
+ make_subcolumn("v.arr.inner.z",
FieldType::OLAP_FIELD_TYPE_ARRAY, "v.arr.inner.z",
+ root_unique_id),
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeArray>(
+
std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()))));
+ ASSERT_EQ(nested.size(), 1);
+ EXPECT_EQ(nested[0]->index_id(), 10002);
+ EXPECT_EQ(nested[0]->get_index_suffix(), "v%2Earr%2Einner%2Ez");
+ EXPECT_NE(nested[0]->get_index_suffix(), "arr%2Einner%2Ez");
+ }
+}
+
TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable) {
// 1. create tablet_schema
TabletSchemaPB schema_pb;
diff --git
a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
index dc73f384e14..219a7c2384f 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java
@@ -1073,4 +1073,5 @@ public class ScalarType extends Type {
}
return false; // Default to false for backward compatibility.
}
+
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
index 5b966103a33..2c10dc20bfa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
@@ -30,9 +30,6 @@ import org.apache.doris.thrift.TSearchFieldBinding;
import org.apache.doris.thrift.TSearchOccur;
import org.apache.doris.thrift.TSearchParam;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -44,8 +41,6 @@ import java.util.stream.IntStream;
* for BE VSearchExpr processing. This is only used during FE->BE translation.
*/
public class SearchPredicate extends Predicate {
- private static final Logger LOG =
LogManager.getLogger(SearchPredicate.class);
-
private final String dslString;
private final QsPlan qsPlan;
private final List<Index> fieldIndexes;
@@ -91,37 +86,6 @@ public class SearchPredicate extends Predicate {
protected void toThrift(TExprNode msg) {
msg.node_type = TExprNodeType.SEARCH_EXPR;
msg.setSearchParam(buildThriftParam());
-
- LOG.info("SearchPredicate.toThrift: dsl='{}', num_children_in_base={},
children_size={}",
- dslString, msg.num_children, this.children.size());
-
- // Print QsPlan details
- if (qsPlan != null) {
- LOG.info("SearchPredicate.toThrift: QsPlan fieldBindings.size={}",
- qsPlan.getFieldBindings() != null ?
qsPlan.getFieldBindings().size() : 0);
- if (qsPlan.getFieldBindings() != null) {
- for (int i = 0; i < qsPlan.getFieldBindings().size(); i++) {
- SearchDslParser.QsFieldBinding binding =
qsPlan.getFieldBindings().get(i);
- LOG.info("SearchPredicate.toThrift: binding[{}]
fieldName='{}', slotIndex={}",
- i, binding.getFieldName(), binding.getSlotIndex());
- }
- }
- }
-
- for (int i = 0; i < this.children.size(); i++) {
- Expr child = this.children.get(i);
- LOG.info("SearchPredicate.toThrift: child[{}] = {} (type={})",
- i, child.getClass().getSimpleName(), child.getType());
- if (child instanceof SlotRef) {
- SlotRef slotRef = (SlotRef) child;
- LOG.info("SearchPredicate.toThrift: SlotRef details -
column={}, isAnalyzed={}",
- slotRef.getColumnName(), slotRef.isAnalyzed());
- if (slotRef.isAnalyzed() && slotRef.getDesc() != null) {
- LOG.info("SearchPredicate.toThrift: SlotRef analyzed -
slotId={}",
- slotRef.getSlotId());
- }
- }
- }
}
@Override
@@ -171,9 +135,6 @@ public class SearchPredicate extends Predicate {
thriftBinding.setIsVariantSubcolumn(true);
thriftBinding.setParentFieldName(parentField);
thriftBinding.setSubcolumnPath(subcolumnPath);
-
- LOG.info("buildThriftParam: variant subcolumn field='{}',
parent='{}', subcolumn='{}'",
- fieldPath, parentField, subcolumnPath);
} else {
thriftBinding.setIsVariantSubcolumn(false);
}
@@ -185,10 +146,7 @@ public class SearchPredicate extends Predicate {
SlotRef slotRef = (SlotRef) this.children.get(i);
int actualSlotId = slotRef.getSlotId().asInt();
thriftBinding.setSlotIndex(actualSlotId);
- LOG.info("buildThriftParam: binding field='{}', actual
slotId={}",
- binding.getFieldName(), actualSlotId);
} else {
- LOG.warn("buildThriftParam: No corresponding SlotRef for field
'{}'", binding.getFieldName());
thriftBinding.setSlotIndex(i); // fallback to position
}
@@ -197,8 +155,6 @@ public class SearchPredicate extends Predicate {
Map<String, String> properties =
fieldIndexes.get(i).getProperties();
if (properties != null && !properties.isEmpty()) {
thriftBinding.setIndexProperties(properties);
- LOG.debug("buildThriftParam: field='{}'
index_properties={}",
- fieldPath, properties);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
index d732b4fc999..923e1fcbc97 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java
@@ -2015,8 +2015,8 @@ public class PropertyAnalyzer {
String bucketNumStr =
properties.get(PROPERTIES_VARIANT_SPARSE_HASH_SHARD_COUNT);
try {
bucketNum = Integer.parseInt(bucketNumStr);
- if (bucketNum < 1 || bucketNum > 1024) {
- throw new
AnalysisException("variant_sparse_hash_shard_count must between 1 and 1024 ");
+ if (bucketNum < 0 || bucketNum > 1024) {
+ throw new
AnalysisException("variant_sparse_hash_shard_count must between 0 and 1024 ");
}
} catch (Exception e) {
throw new AnalysisException("variant_sparse_hash_shard_count
format error:" + e.getMessage());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
index 58c0ca9217d..64d1959fe70 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
@@ -213,6 +213,25 @@ public class ExpressionTranslator extends
DefaultExpressionVisitor<Expr, PlanTra
.orElse(null);
}
+ private Index getInvertedIndexFromTranslatedSlot(Expr translatedSlot) {
+ if (!(translatedSlot instanceof SlotRef)) {
+ return null;
+ }
+ SlotRef slotRef = (SlotRef) translatedSlot;
+ if (!slotRef.isAnalyzed() || slotRef.getDesc() == null ||
slotRef.getDesc().getParent() == null) {
+ return null;
+ }
+ if (!(slotRef.getDesc().getParent().getTable() instanceof OlapTable)) {
+ return null;
+ }
+ Column column = slotRef.getColumn();
+ if (column == null) {
+ return null;
+ }
+ OlapTable olapTbl = (OlapTable)
slotRef.getDesc().getParent().getTable();
+ return olapTbl.getInvertedIndex(column,
slotRef.getDesc().getSubColLables());
+ }
+
@Override
public Expr visitElementAt(ElementAt elementAt, PlanTranslatorContext
context) {
return visitScalarFunction(elementAt, context);
@@ -726,6 +745,9 @@ public class ExpressionTranslator extends
DefaultExpressionVisitor<Expr, PlanTra
}
}
}
+ if (invertedIndex == null) {
+ invertedIndex =
getInvertedIndexFromTranslatedSlot(translatedSlot);
+ }
fieldIndexes.add(invertedIndex);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java
index fecf4d0b0cc..5ae317db058 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VariantType.java
@@ -184,6 +184,11 @@ public class VariantType extends PrimitiveType {
.append(String.valueOf(variantSparseHashShardCount))
.append("\"");
}
+ if (enableNestedGroup) {
+ sb.append(",");
+ sb.append("\"variant_enable_nested_group\" = \"")
+ .append(String.valueOf(enableNestedGroup)).append("\"");
+ }
sb.append(")>");
return sb.toString();
}
@@ -270,4 +275,8 @@ public class VariantType extends PrimitiveType {
public int getVariantDocShardCount() {
return variantDocShardCount;
}
+
+ public boolean getEnableNestedGroup() {
+ return enableNestedGroup;
+ }
}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java
b/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java
index fe3e2b0bd0a..29353ae6c50 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TypeTest.java
@@ -128,6 +128,14 @@ public class TypeTest {
Assert.assertFalse(Type.matchExactType(v1, v4, false));
}
+ @Test
+ public void
testVariantToSqlDoesNotSerializeUnsupportedNestedGroupProperty() {
+ VariantType variantType = new VariantType(new ArrayList<>(), 0, false,
10000, 0,
+ false, 0L, 64, true);
+
+
Assert.assertFalse(variantType.toSql().contains("variant_enable_nested_group"));
+ }
+
// ===================== Mixed Nesting & Precision =====================
@Test
public void testArrayMapStructCombinationWithPrecision() {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
index 062ba47b1b5..27a217c6b16 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
@@ -25,6 +25,7 @@ import org.apache.doris.nereids.StatementContext;
import org.apache.doris.nereids.analyzer.UnboundFunction;
import org.apache.doris.nereids.analyzer.UnboundOneRowRelation;
import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.exceptions.NotSupportedException;
import org.apache.doris.nereids.exceptions.ParseException;
import org.apache.doris.nereids.exceptions.SyntaxParseException;
import org.apache.doris.nereids.glue.LogicalPlanAdapter;
@@ -1362,6 +1363,17 @@ public class NereidsParserTest extends ParserTestBase {
Assertions.assertTrue(createTableCommand.getCtasQuery().isPresent());
}
+ @Test
+ public void testCreateTableVariantNestedGroupPropertyIsRejected() {
+ NereidsParser parser = new NereidsParser();
+ String sql = "CREATE TABLE t_variant_ng (k1 INT, v VARIANT<PROPERTIES("
+ + "\"variant_enable_nested_group\" = \"true\")>) "
+ + "DISTRIBUTED BY HASH(k1) BUCKETS 1";
+ NotSupportedException exception =
+ Assertions.assertThrowsExactly(NotSupportedException.class, ()
-> parser.parseSingle(sql));
+
Assertions.assertTrue(exception.getMessage().contains("variant_enable_nested_group
is not supported now"));
+ }
+
@Test
public void testCreateViewWithoutAs() {
NereidsParser parser = new NereidsParser();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]