This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2f60973b98a [Refactor](variant) Promote enable_doc_mode from
column-level to type-level (#61895)
2f60973b98a is described below
commit 2f60973b98a7d794543fdec7c81d1d637698bd09
Author: Chenyang Sun <[email protected]>
AuthorDate: Tue Mar 31 14:33:00 2026 +0800
[Refactor](variant) Promote enable_doc_mode from column-level to type-level
(#61895)
Migrate enable_doc_mode from TColumn (column-level) to
TScalarType/TColumnType (type-level) across FE schema, Thrift/Protobuf
IDL, BE data-type constructors, storage writers/readers, and compaction
paths. This ensures the parameter flows consistently through the type
system rather than being a column-level property.
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [x] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
be/src/core/column/column_variant.cpp | 99 ++++++++++++++++------
be/src/core/column/column_variant.h | 16 ++--
be/src/core/data_type/data_type_factory.cpp | 26 +++---
be/src/core/data_type/data_type_variant.cpp | 19 ++---
be/src/core/data_type/data_type_variant.h | 6 +-
be/src/exec/common/variant_util.cpp | 13 ++-
be/src/exec/scan/olap_scanner.cpp | 5 +-
.../exprs/function/array/function_array_utils.cpp | 2 +-
be/src/exprs/function/array/function_array_utils.h | 2 +
be/src/exprs/function/cast/cast_to_variant.h | 7 +-
be/src/exprs/function/function_variant_element.cpp | 53 ++++++------
be/src/exprs/table_function/vexplode.cpp | 1 +
be/src/exprs/table_function/vexplode_v2.cpp | 1 +
be/src/storage/segment/segment.cpp | 7 +-
be/src/storage/segment/segment_writer.cpp | 1 +
.../segment/variant/hierarchical_data_iterator.cpp | 66 +++++++++++++--
.../segment/variant/hierarchical_data_iterator.h | 8 +-
.../segment/variant/variant_column_reader.cpp | 18 ++--
.../segment/variant/variant_column_writer_impl.cpp | 7 +-
.../variant_doc_snpashot_compact_iterator.h | 3 +-
.../variant_streaming_compaction_writer.cpp | 2 +-
be/src/storage/segment/vertical_segment_writer.cpp | 1 +
be/src/storage/tablet/tablet_meta.cpp | 4 +-
be/src/storage/tablet/tablet_schema.cpp | 4 +-
be/src/storage/tablet/tablet_schema.h | 3 +-
be/test/core/column/column_variant_test.cpp | 50 +++++------
be/test/exec/common/schema_util_rowset_test.cpp | 2 +-
be/test/exec/common/schema_util_test.cpp | 22 ++---
.../function/cast/function_variant_cast_test.cpp | 16 ++--
.../function/function_variant_element_test.cpp | 2 +-
.../segment/hierarchical_data_iterator_test.cpp | 4 +-
.../storage/segment/nested_group_provider_test.cpp | 2 +-
.../segment/variant_column_writer_reader_test.cpp | 43 +++++-----
be/test/storage/segment/variant_util_test.cpp | 16 ++--
be/test/testutil/variant_util.h | 10 +--
.../main/java/org/apache/doris/catalog/Column.java | 1 +
.../ExternalFileTableValuedFunction.java | 4 +-
.../java/org/apache/doris/catalog/VariantType.java | 2 +
gensrc/proto/data.proto | 1 +
gensrc/proto/segment_v2.proto | 1 +
gensrc/proto/types.proto | 1 +
gensrc/thrift/Descriptors.thrift | 2 +-
gensrc/thrift/Types.thrift | 2 +
43 files changed, 356 insertions(+), 199 deletions(-)
diff --git a/be/src/core/column/column_variant.cpp
b/be/src/core/column/column_variant.cpp
index 9dafa0c5230..5ef2016d243 100644
--- a/be/src/core/column/column_variant.cpp
+++ b/be/src/core/column/column_variant.cpp
@@ -480,7 +480,8 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&&
func) const {
return finalized_object.apply_for_columns(std::forward<Func>(func));
}
auto new_root = func(get_root())->assume_mutable();
- auto res = ColumnVariant::create(_max_subcolumns_count, get_root_type(),
std::move(new_root));
+ auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode,
get_root_type(),
+ std::move(new_root));
for (const auto& subcolumn : subcolumns) {
if (subcolumn->data.is_root) {
continue;
@@ -678,17 +679,21 @@
ColumnVariant::Subcolumn::LeastCommonType::LeastCommonType(DataTypePtr type_, bo
base_type_id = base_type->get_primitive_type();
}
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count)
- : is_nullable(true), num_rows(0),
_max_subcolumns_count(max_subcolumns_count) {
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode)
+ : is_nullable(true),
+ num_rows(0),
+ _max_subcolumns_count(max_subcolumns_count),
+ _enable_doc_mode(enable_doc_mode) {
subcolumns.create_root(Subcolumn(0, is_nullable, true /*root*/));
ENABLE_CHECK_CONSISTENCY(this);
}
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, DataTypePtr
root_type,
- MutableColumnPtr&& root_column)
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode,
+ DataTypePtr root_type, MutableColumnPtr&&
root_column)
: is_nullable(true),
num_rows(root_column->size()),
- _max_subcolumns_count(max_subcolumns_count) {
+ _max_subcolumns_count(max_subcolumns_count),
+ _enable_doc_mode(enable_doc_mode) {
subcolumns.create_root(
Subcolumn(std::move(root_column), root_type, is_nullable, true
/*root*/));
serialized_sparse_column->resize(num_rows);
@@ -696,11 +701,13 @@ ColumnVariant::ColumnVariant(int32_t
max_subcolumns_count, DataTypePtr root_type
ENABLE_CHECK_CONSISTENCY(this);
}
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, Subcolumns&&
subcolumns_)
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode,
+ Subcolumns&& subcolumns_)
: is_nullable(true),
subcolumns(std::move(subcolumns_)),
num_rows(subcolumns.empty() ? 0 :
(*subcolumns.begin())->data.size()),
- _max_subcolumns_count(max_subcolumns_count) {
+ _max_subcolumns_count(max_subcolumns_count),
+ _enable_doc_mode(enable_doc_mode) {
if (max_subcolumns_count && subcolumns_.size() > max_subcolumns_count + 1)
{
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
"unmatched max subcolumns count:, max
subcolumns count: {}, but "
@@ -711,8 +718,11 @@ ColumnVariant::ColumnVariant(int32_t max_subcolumns_count,
Subcolumns&& subcolum
serialized_doc_value_column->resize(num_rows);
}
-ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, size_t size)
- : is_nullable(true), num_rows(0),
_max_subcolumns_count(max_subcolumns_count) {
+ColumnVariant::ColumnVariant(int32_t max_subcolumns_count, bool
enable_doc_mode, size_t size)
+ : is_nullable(true),
+ num_rows(0),
+ _max_subcolumns_count(max_subcolumns_count),
+ _enable_doc_mode(enable_doc_mode) {
subcolumns.create_root(Subcolumn(0, is_nullable, true /*root*/));
insert_many_defaults(size);
ENABLE_CHECK_CONSISTENCY(this);
@@ -737,12 +747,30 @@ void ColumnVariant::check_consistency() const {
"unmatched doc snapshot column:, expeted rows:
{}, but meet: {}",
num_rows, serialized_doc_value_column->size());
}
- // const auto& offsets = serialized_doc_value_column_offsets();
- // size_t off = offsets[num_rows - 1];
- // if (off > 0 && subcolumns.size() != 1) {
- // throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
- // "doc snapshot column offsets is not empty,
but subcolumns size is not 1");
- // }
+ if (_enable_doc_mode && num_rows > 0) {
+ // doc mode invariants:
+ // - only root subcolumn (size == 1)
+ // - sparse column is empty
+ // - subcolumns and doc_value are mutually exclusive
+ if (subcolumns.size() != 1) {
+ throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+ "doc mode: should only have root, but
subcolumns size is {}",
+ subcolumns.size());
+ }
+ const auto& sparse_offs = serialized_sparse_column_offsets();
+ if (sparse_offs[num_rows - 1] > 0) {
+ throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+ "doc mode: should not have sparse data");
+ }
+ } else {
+ const auto& offsets = serialized_doc_value_column_offsets();
+ size_t off = offsets[num_rows - 1];
+ if (off > 0 && subcolumns.size() != 1) {
+ throw doris::Exception(
+ doris::ErrorCode::INTERNAL_ERROR,
+ "doc snapshot column offsets is not empty, but subcolumns
size is not 1");
+ }
+ }
}
size_t ColumnVariant::size() const {
@@ -752,7 +780,7 @@ size_t ColumnVariant::size() const {
MutableColumnPtr ColumnVariant::clone_resized(size_t new_size) const {
if (new_size == 0) {
- return ColumnVariant::create(_max_subcolumns_count);
+ return ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
}
return apply_for_columns(
[&](const ColumnPtr column) { return
column->clone_resized(new_size); });
@@ -793,8 +821,11 @@ void ColumnVariant::for_each_subcolumn(ColumnCallback
callback) {
void ColumnVariant::insert_from(const IColumn& src, size_t n) {
const auto* src_v = check_and_get_column<ColumnVariant>(src);
- // only root, quick insert
- if (src_v->get_subcolumns().size() == 1 && get_subcolumns().size() == 1) {
+ ENABLE_CHECK_CONSISTENCY(src_v);
+ ENABLE_CHECK_CONSISTENCY(this);
+ // doc mode fast path: both sides root-only, direct copy root + sparse +
doc_value
+ if (_enable_doc_mode) {
+ DCHECK(src_v->_enable_doc_mode) << "dst is doc mode but src is not";
FieldWithDataType field;
src_v->subcolumns.get_root()->data.get(n, field);
subcolumns.get_mutable_root()->data.insert(field);
@@ -802,8 +833,9 @@ void ColumnVariant::insert_from(const IColumn& src, size_t
n) {
serialized_doc_value_column->insert_from(*src_v->get_doc_value_column(), n);
num_rows++;
} else {
- return try_insert((*src_v)[n]);
+ try_insert((*src_v)[n]);
}
+ ENABLE_CHECK_CONSISTENCY(this);
}
void ColumnVariant::try_insert(const Field& field) {
@@ -1126,6 +1158,20 @@ void ColumnVariant::insert_range_from(const IColumn&
src, size_t start, size_t l
ENABLE_CHECK_CONSISTENCY(&src_object);
ENABLE_CHECK_CONSISTENCY(this);
+ // doc mode fast path: both sides root-only, direct range copy root +
sparse + doc_value
+ if (_enable_doc_mode) {
+ DCHECK(src_object._enable_doc_mode) << "dst is doc mode but src is
not";
+ subcolumns.get_mutable_root()->data.insert_range_from(
+ src_object.subcolumns.get_root()->data, start, length);
+
serialized_sparse_column->insert_range_from(*src_object.serialized_sparse_column,
start,
+ length);
+
serialized_doc_value_column->insert_range_from(*src_object.serialized_doc_value_column,
+ start, length);
+ num_rows += length;
+ ENABLE_CHECK_CONSISTENCY(this);
+ return;
+ }
+
// First, insert src subcolumns
// We can reach the limit of subcolumns, and in this case
// the rest of subcolumns from src will be inserted into sparse column.
@@ -1288,7 +1334,7 @@ MutableColumnPtr ColumnVariant::permute(const
Permutation& perm, size_t limit) c
}
if (limit == 0) {
- return ColumnVariant::create(_max_subcolumns_count);
+ return ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
}
return apply_for_columns([&](const ColumnPtr column) { return
column->permute(perm, limit); });
@@ -2144,13 +2190,14 @@ ColumnPtr ColumnVariant::filter(const Filter& filter,
ssize_t count) const {
return finalized_object.filter(filter, count);
}
if (num_rows == 0) {
- auto res = ColumnVariant::create(_max_subcolumns_count,
count_bytes_in_filter(filter));
+ auto res = ColumnVariant::create(_max_subcolumns_count,
_enable_doc_mode,
+ count_bytes_in_filter(filter));
ENABLE_CHECK_CONSISTENCY(res.get());
return res;
}
auto new_root = get_root()->filter(filter, count)->assume_mutable();
- auto new_column =
- ColumnVariant::create(_max_subcolumns_count, get_root_type(),
std::move(new_root));
+ auto new_column = ColumnVariant::create(_max_subcolumns_count,
_enable_doc_mode,
+ get_root_type(),
std::move(new_root));
for (const auto& entry : subcolumns) {
if (entry->data.is_root) {
continue;
@@ -2254,7 +2301,7 @@ bool NO_SANITIZE_UNDEFINED
ColumnVariant::is_scalar_variant() const {
const DataTypePtr ColumnVariant::NESTED_TYPE =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeArray>(
-
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(0))));
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(0,
false))));
const DataTypePtr ColumnVariant::NESTED_TYPE_AS_ARRAY_OF_JSONB =
std::make_shared<DataTypeArray>(
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeJsonb>()));
@@ -2634,7 +2681,7 @@ void
ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, Null
}
MutableColumnPtr ColumnVariant::clone() const {
- auto res = ColumnVariant::create(_max_subcolumns_count);
+ auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
Subcolumns new_subcolumns;
for (const auto& subcolumn : subcolumns) {
auto new_subcolumn = subcolumn->data;
diff --git a/be/src/core/column/column_variant.h
b/be/src/core/column/column_variant.h
index 2c3c427454e..9b7d84df525 100644
--- a/be/src/core/column/column_variant.h
+++ b/be/src/core/column/column_variant.h
@@ -286,6 +286,9 @@ private:
// if `_max_subcolumns_count == 0`, all subcolumns are materialized.
int32_t _max_subcolumns_count = 0;
+ // whether this column is configured for doc mode
+ bool _enable_doc_mode = false;
+
// subcolumns count materialized from typed paths
size_t typed_path_count = 0;
@@ -298,15 +301,16 @@ public:
private:
friend class COWHelper<IColumn, ColumnVariant>;
// always create root: data type nothing
- explicit ColumnVariant(int32_t max_subcolumns_count);
+ explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode);
// always create root: data type nothing
- explicit ColumnVariant(int32_t max_subcolumns_count, size_t size);
+ explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
size_t size);
- explicit ColumnVariant(int32_t max_subcolumns_count, DataTypePtr root_type,
- MutableColumnPtr&& root_column);
+ explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
+ DataTypePtr root_type, MutableColumnPtr&&
root_column);
- explicit ColumnVariant(int32_t max_subcolumns_count, Subcolumns&&
subcolumns_);
+ explicit ColumnVariant(int32_t max_subcolumns_count, bool enable_doc_mode,
+ Subcolumns&& subcolumns_);
public:
~ColumnVariant() override = default;
@@ -381,6 +385,8 @@ public:
int32_t max_subcolumns_count() const { return _max_subcolumns_count; }
+ bool enable_doc_mode() const { return _enable_doc_mode; }
+
/// Adds a subcolumn from existing IColumn.
bool add_sub_column(const PathInData& key, MutableColumnPtr&& subcolumn,
DataTypePtr type);
diff --git a/be/src/core/data_type/data_type_factory.cpp
b/be/src/core/data_type/data_type_factory.cpp
index a0f2fed6e99..1022449b0c9 100644
--- a/be/src/core/data_type/data_type_factory.cpp
+++ b/be/src/core/data_type/data_type_factory.cpp
@@ -104,7 +104,8 @@ DataTypePtr DataTypeFactory::create_data_type(const
TabletColumn& col_desc, bool
}
nested = std::make_shared<DataTypeStruct>(dataTypes, names);
} else if (col_desc.type() == FieldType::OLAP_FIELD_TYPE_VARIANT) {
- nested =
std::make_shared<DataTypeVariant>(col_desc.variant_max_subcolumns_count());
+ nested =
std::make_shared<DataTypeVariant>(col_desc.variant_max_subcolumns_count(),
+
col_desc.variant_enable_doc_mode());
} else {
nested = _create_primitive_data_type(col_desc.type(),
col_desc.precision(), col_desc.frac(),
col_desc.length());
@@ -175,7 +176,7 @@ DataTypePtr
DataTypeFactory::_create_primitive_data_type(const FieldType& type,
result = std::make_shared<DataTypeString>(-1, TYPE_STRING);
break;
case FieldType::OLAP_FIELD_TYPE_VARIANT:
- result = std::make_shared<DataTypeVariant>(0);
+ result = std::make_shared<DataTypeVariant>(0, false);
break;
case FieldType::OLAP_FIELD_TYPE_JSONB:
result = std::make_shared<DataTypeJsonb>();
@@ -242,7 +243,8 @@ DataTypePtr DataTypeFactory::create_data_type(const
PColumnMeta& pcolumn) {
nested = std::make_shared<DataTypeString>();
break;
case PGenericType::VARIANT:
- nested =
std::make_shared<DataTypeVariant>(pcolumn.variant_max_subcolumns_count());
+ nested =
std::make_shared<DataTypeVariant>(pcolumn.variant_max_subcolumns_count(),
+
pcolumn.variant_enable_doc_mode());
break;
case PGenericType::JSONB:
nested = std::make_shared<DataTypeJsonb>();
@@ -447,7 +449,7 @@ DataTypePtr DataTypeFactory::create_data_type(const
PrimitiveType primitive_type
nested = std::make_shared<DataTypeFloat64>();
break;
case TYPE_VARIANT:
- nested = std::make_shared<DataTypeVariant>(0);
+ nested = std::make_shared<DataTypeVariant>(0, false);
break;
case TYPE_STRING:
case TYPE_CHAR:
@@ -522,10 +524,12 @@ DataTypePtr DataTypeFactory::create_data_type(const
std::vector<TTypeNode>& type
if (scalar_type.type == TPrimitiveType::VARIANT) {
DCHECK(scalar_type.variant_max_subcolumns_count >= 0)
<< "count is: " <<
scalar_type.variant_max_subcolumns_count;
- return is_nullable ?
make_nullable(std::make_shared<DataTypeVariant>(
-
scalar_type.variant_max_subcolumns_count))
- : std::make_shared<DataTypeVariant>(
-
scalar_type.variant_max_subcolumns_count);
+ bool doc_mode = scalar_type.__isset.variant_enable_doc_mode
+ ? scalar_type.variant_enable_doc_mode
+ : false;
+ auto dt =
std::make_shared<DataTypeVariant>(scalar_type.variant_max_subcolumns_count,
+ doc_mode);
+ return is_nullable ? make_nullable(dt) : dt;
}
return create_data_type(thrift_to_type(scalar_type.type), is_nullable,
scalar_type.__isset.precision ?
scalar_type.precision : 0,
@@ -628,7 +632,8 @@ DataTypePtr DataTypeFactory::create_data_type(
// Do nothing
nested = std::make_shared<DataTypeAggState>();
} else if (primitive_type == TYPE_VARIANT) {
- nested =
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count());
+ nested =
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count(),
+
node.variant_enable_doc_mode());
} else {
return create_data_type(primitive_type, is_nullable,
scalar_type.has_precision() ?
scalar_type.precision() : 0,
@@ -669,7 +674,8 @@ DataTypePtr DataTypeFactory::create_data_type(
break;
}
case TTypeNodeType::VARIANT: {
- nested =
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count());
+ nested =
std::make_shared<DataTypeVariant>(node.variant_max_subcolumns_count(),
+
node.variant_enable_doc_mode());
break;
}
default:
diff --git a/be/src/core/data_type/data_type_variant.cpp
b/be/src/core/data_type/data_type_variant.cpp
index 2837b0532d8..2a27b3d87d7 100644
--- a/be/src/core/data_type/data_type_variant.cpp
+++ b/be/src/core/data_type/data_type_variant.cpp
@@ -46,19 +46,15 @@ class IColumn;
namespace doris {
#include "common/compile_check_begin.h"
-DataTypeVariant::DataTypeVariant(int32_t max_subcolumns_count)
- : _max_subcolumns_count(max_subcolumns_count) {
- name = fmt::format("Variant(max subcolumns count = {})",
max_subcolumns_count);
+DataTypeVariant::DataTypeVariant(int32_t max_subcolumns_count, bool
enable_doc_mode)
+ : _max_subcolumns_count(max_subcolumns_count),
_enable_doc_mode(enable_doc_mode) {
+ name = fmt::format("Variant(max subcolumns count = {}, enable doc mode =
{})",
+ max_subcolumns_count, enable_doc_mode);
}
bool DataTypeVariant::equals(const IDataType& rhs) const {
auto rhs_type = typeid_cast<const DataTypeVariant*>(&rhs);
- if (rhs_type && _max_subcolumns_count !=
rhs_type->variant_max_subcolumns_count()) {
- VLOG_DEBUG << "_max_subcolumns_count is" << _max_subcolumns_count
- << "rhs_type->variant_max_subcolumns_count()"
- << rhs_type->variant_max_subcolumns_count();
- return false;
- }
- return rhs_type && _max_subcolumns_count ==
rhs_type->variant_max_subcolumns_count();
+ return rhs_type && _max_subcolumns_count ==
rhs_type->variant_max_subcolumns_count() &&
+ _enable_doc_mode == rhs_type->enable_doc_mode();
}
int64_t DataTypeVariant::get_uncompressed_serialized_bytes(const IColumn&
column,
@@ -240,10 +236,11 @@ const char* DataTypeVariant::deserialize(const char* buf,
MutableColumnPtr* colu
void DataTypeVariant::to_pb_column_meta(PColumnMeta* col_meta) const {
IDataType::to_pb_column_meta(col_meta);
col_meta->set_variant_max_subcolumns_count(_max_subcolumns_count);
+ col_meta->set_variant_enable_doc_mode(_enable_doc_mode);
}
MutableColumnPtr DataTypeVariant::create_column() const {
- return ColumnVariant::create(_max_subcolumns_count);
+ return ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode);
}
} // namespace doris
\ No newline at end of file
diff --git a/be/src/core/data_type/data_type_variant.h
b/be/src/core/data_type/data_type_variant.h
index fd59733f452..d4353b9c858 100644
--- a/be/src/core/data_type/data_type_variant.h
+++ b/be/src/core/data_type/data_type_variant.h
@@ -48,13 +48,14 @@ namespace doris {
class DataTypeVariant : public IDataType {
private:
int32_t _max_subcolumns_count = 0;
+ bool _enable_doc_mode = false;
std::string name = "Variant";
public:
static constexpr PrimitiveType PType = TYPE_VARIANT;
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_VARIANT; }
DataTypeVariant() = default;
- DataTypeVariant(int32_t max_subcolumns_count);
+ DataTypeVariant(int32_t max_subcolumns_count, bool enable_doc_mode);
String do_get_name() const override { return name; }
const std::string get_family_name() const override { return "Variant"; }
@@ -81,8 +82,11 @@ public:
};
void to_protobuf(PTypeDesc* ptype, PTypeNode* node, PScalarType*
scalar_type) const override {
node->set_type(TTypeNodeType::VARIANT);
+ node->set_variant_max_subcolumns_count(_max_subcolumns_count);
+ node->set_variant_enable_doc_mode(_enable_doc_mode);
}
void to_pb_column_meta(PColumnMeta* col_meta) const override;
int32_t variant_max_subcolumns_count() const { return
_max_subcolumns_count; }
+ bool enable_doc_mode() const { return _enable_doc_mode; }
};
} // namespace doris
diff --git a/be/src/exec/common/variant_util.cpp
b/be/src/exec/common/variant_util.cpp
index dd0c15a0025..73a31b95c7b 100644
--- a/be/src/exec/common/variant_util.cpp
+++ b/be/src/exec/common/variant_util.cpp
@@ -307,7 +307,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const
DataTypePtr& type, Co
CHECK(arg.column->is_nullable());
auto to_type = remove_nullable(type);
const auto& data_type_object = assert_cast<const
DataTypeVariant&>(*to_type);
- auto variant =
ColumnVariant::create(data_type_object.variant_max_subcolumns_count());
+ auto variant =
ColumnVariant::create(data_type_object.variant_max_subcolumns_count(),
+
data_type_object.enable_doc_mode());
variant->create_root(arg.type, arg.column->assume_mutable());
ColumnPtr nullable = ColumnNullable::create(
@@ -381,8 +382,9 @@ void get_column_by_type(const DataTypePtr& data_type, const
std::string& name, T
return;
}
if (data_type->get_primitive_type() == PrimitiveType::TYPE_VARIANT) {
- column.set_variant_max_subcolumns_count(assert_cast<const
DataTypeVariant*>(data_type.get())
-
->variant_max_subcolumns_count());
+ const auto* dt_variant = assert_cast<const
DataTypeVariant*>(data_type.get());
+
column.set_variant_max_subcolumns_count(dt_variant->variant_max_subcolumns_count());
+ column.set_variant_enable_doc_mode(dt_variant->enable_doc_mode());
return;
}
// size is not fixed when type is string or json
@@ -1145,6 +1147,7 @@ void
VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
subcolumn.set_aggregation_method(parent_column->aggregation());
subcolumn.set_variant_max_subcolumns_count(
parent_column->variant_max_subcolumns_count());
+
subcolumn.set_variant_enable_doc_mode(parent_column->variant_enable_doc_mode());
subcolumn.set_is_nullable(true);
output_schema->append_column(subcolumn);
VLOG_DEBUG << "append sub column " << subpath << " data type "
@@ -1253,6 +1256,7 @@ Status
VariantCompactionUtil::get_extended_compaction_schema(
TabletColumn doc_value_bucket_column =
create_doc_value_column(*column, b);
doc_value_bucket_column.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
doc_value_bucket_column.set_is_nullable(false);
+ doc_value_bucket_column.set_variant_enable_doc_mode(true);
output_schema->append_column(doc_value_bucket_column);
}
continue;
@@ -1937,6 +1941,7 @@ void parse_json_to_variant_impl(IColumn& column, const
char* src, size_t length,
}
break;
case ParseConfig::ParseTo::OnlyDocValueColumn: {
+ CHECK(column_variant.enable_doc_mode()) << "OnlyDocValueColumn
requires doc mode enabled";
std::vector<size_t> doc_item_indexes;
doc_item_indexes.reserve(paths.size());
phmap::flat_hash_set<StringRef, StringRefHash> seen_paths;
@@ -2130,7 +2135,7 @@ Status _parse_and_materialize_variant_columns(Block&
block,
}
if (scalar_root_column->is_column_string()) {
- variant_column = ColumnVariant::create(0);
+ variant_column = ColumnVariant::create(0, var.enable_doc_mode());
parse_json_to_variant(*variant_column.get(),
assert_cast<const
ColumnString&>(*scalar_root_column),
configs[i]);
diff --git a/be/src/exec/scan/olap_scanner.cpp
b/be/src/exec/scan/olap_scanner.cpp
index ef98bc553dd..e11bd1d7717 100644
--- a/be/src/exec/scan/olap_scanner.cpp
+++ b/be/src/exec/scan/olap_scanner.cpp
@@ -554,11 +554,12 @@ Status OlapScanner::_init_variant_columns() {
if (slot->type()->get_primitive_type() == PrimitiveType::TYPE_VARIANT)
{
// Such columns are not exist in frontend schema info, so we need
to
// add them into tablet_schema for later column indexing.
+ const auto& dt_variant =
+ assert_cast<const
DataTypeVariant&>(*remove_nullable(slot->type()));
TabletColumn subcol =
TabletColumn::create_materialized_variant_column(
tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(),
slot->column_paths(), slot->col_unique_id(),
- assert_cast<const
DataTypeVariant&>(*remove_nullable(slot->type()))
- .variant_max_subcolumns_count());
+ dt_variant.variant_max_subcolumns_count(),
dt_variant.enable_doc_mode());
if (tablet_schema->field_index(*subcol.path_info_ptr()) < 0) {
tablet_schema->append_column(subcol,
TabletSchema::ColumnType::VARIANT);
}
diff --git a/be/src/exprs/function/array/function_array_utils.cpp
b/be/src/exprs/function/array/function_array_utils.cpp
index 4ce3cc4a737..29776153d17 100644
--- a/be/src/exprs/function/array/function_array_utils.cpp
+++ b/be/src/exprs/function/array/function_array_utils.cpp
@@ -58,7 +58,7 @@ bool extract_column_array_info(const IColumn& src,
ColumnArrayExecutionData& dat
if (data.output_as_variant &&
data.nested_type->get_primitive_type() != PrimitiveType::TYPE_VARIANT)
{
// set variant root column/type to from column/type
- auto variant = ColumnVariant::create(true /*always nullable*/);
+ auto variant = ColumnVariant::create(0, data.variant_enable_doc_mode);
auto nullable_nested_type = make_nullable(data.nested_type);
auto nullable_col = make_nullable(data.nested_col);
variant->create_root(nullable_nested_type,
std::move(*nullable_col).mutate());
diff --git a/be/src/exprs/function/array/function_array_utils.h
b/be/src/exprs/function/array/function_array_utils.h
index 8b57b4b0e9f..f13791c8cdf 100644
--- a/be/src/exprs/function/array/function_array_utils.h
+++ b/be/src/exprs/function/array/function_array_utils.h
@@ -57,6 +57,8 @@ public:
DataTypePtr nested_type = nullptr;
// wrap the nested column as variant column
bool output_as_variant = false;
+ // propagate enable_doc_mode when wrapping as variant
+ bool variant_enable_doc_mode = false;
ColumnArrayMutableData to_mutable_data() const {
ColumnArrayMutableData dst;
diff --git a/be/src/exprs/function/cast/cast_to_variant.h
b/be/src/exprs/function/cast/cast_to_variant.h
index 45c0c012f95..69b2322c679 100644
--- a/be/src/exprs/function/cast/cast_to_variant.h
+++ b/be/src/exprs/function/cast/cast_to_variant.h
@@ -134,7 +134,12 @@ struct CastToVariant {
const auto& from_type = col_with_type_and_name.type;
const auto& col_from = col_with_type_and_name.column;
// set variant root column/type to from column/type
- auto variant = ColumnVariant::create(true /*always nullable*/);
+ const auto& data_type_to = block.get_by_position(result).type;
+ const auto* variant_type =
+ typeid_cast<const
DataTypeVariant*>(remove_nullable(data_type_to).get());
+ auto variant = ColumnVariant::create(
+ variant_type ? variant_type->variant_max_subcolumns_count() :
0,
+ variant_type ? variant_type->enable_doc_mode() : false);
variant->create_root(from_type, col_from->assume_mutable());
block.replace_by_position(result, std::move(variant));
return Status::OK();
diff --git a/be/src/exprs/function/function_variant_element.cpp
b/be/src/exprs/function/function_variant_element.cpp
index ba84d7f1a61..dc0717a9d74 100644
--- a/be/src/exprs/function/function_variant_element.cpp
+++ b/be/src/exprs/function/function_variant_element.cpp
@@ -77,7 +77,8 @@ public:
auto arg_variant = remove_nullable(arguments[0]);
const auto& data_type_object = assert_cast<const
DataTypeVariant&>(*arg_variant);
return make_nullable(
-
std::make_shared<DataTypeVariant>(data_type_object.variant_max_subcolumns_count()));
+
std::make_shared<DataTypeVariant>(data_type_object.variant_max_subcolumns_count(),
+
data_type_object.enable_doc_mode()));
}
// wrap variant column with nullable
@@ -199,7 +200,6 @@ private:
const PathInData& path,
ColumnVariant::MutablePtr& target_ptr) {
ColumnVariant::Subcolumn root {0, true, true};
- // no root, no sparse column
const auto& doc_value_data_map =
assert_cast<const
ColumnMap&>(*src_ptr->get_doc_value_column());
const auto& src_doc_value_data_offsets =
doc_value_data_map.get_offsets();
@@ -207,11 +207,13 @@ private:
assert_cast<const
ColumnString&>(doc_value_data_map.get_keys());
const auto& src_doc_value_data_values =
assert_cast<const
ColumnString&>(doc_value_data_map.get_values());
- auto& sparse_data_offsets =
-
assert_cast<ColumnMap&>(*target_ptr->get_sparse_column()->assume_mutable())
+ // Write extracted data into target's doc_value column (not sparse) to
preserve
+ // doc mode invariant: doc_mode columns must not have sparse data.
+ auto& doc_value_offsets =
+
assert_cast<ColumnMap&>(*target_ptr->get_doc_value_column()->assume_mutable())
.get_offsets();
- auto [sparse_data_paths, sparse_data_values] =
- target_ptr->get_sparse_data_paths_and_values();
+ auto [doc_value_paths, doc_value_values] =
+ target_ptr->get_doc_value_data_paths_and_values();
StringRef prefix_ref(path.get_path());
std::string_view path_prefix(prefix_ref.data, prefix_ref.size);
for (size_t i = 0; i != src_doc_value_data_offsets.size(); ++i) {
@@ -225,33 +227,26 @@ private:
if (!nested_path.starts_with(path_prefix)) {
break;
}
- // Don't include path that is equal to the prefix.
if (nested_path.size() != path_prefix.size()) {
auto sub_path_optional = get_sub_path(nested_path,
path_prefix);
if (!sub_path_optional.has_value()) {
continue;
}
std::string_view sub_path = *sub_path_optional;
- sparse_data_paths->insert_data(sub_path.data(),
sub_path.size());
- sparse_data_values->insert_from(src_doc_value_data_values,
lower_bound_index);
+ doc_value_paths->insert_data(sub_path.data(),
sub_path.size());
+ doc_value_values->insert_from(src_doc_value_data_values,
lower_bound_index);
} else {
- // insert into root column, example: access v['b'] and b
is in sparse column
- // data example:
- // {"b" : 123}
- // {"b" : {"c" : 456}}
- // b maybe in sparse column, and b.c is in subolumn, put
`b` into root column to distinguish
- // from "" which is empty path and root
root.deserialize_from_binary_column(&src_doc_value_data_values,
lower_bound_index);
}
}
- if (root.size() == sparse_data_offsets.size()) {
+ if (root.size() == doc_value_offsets.size()) {
root.insert_default();
}
- sparse_data_offsets.push_back(sparse_data_paths->size());
+ doc_value_offsets.push_back(doc_value_paths->size());
}
target_ptr->get_subcolumns().create_root(root);
-
target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size());
+
target_ptr->get_sparse_column()->assume_mutable()->resize(src_ptr->size());
target_ptr->set_num_rows(src_ptr->size());
}
@@ -259,7 +254,7 @@ private:
ColumnPtr* result) {
std::string field_name = index_column->get_data_at(0).to_string();
if (src.empty()) {
- *result = ColumnVariant::create(src.max_subcolumns_count());
+ *result = ColumnVariant::create(src.max_subcolumns_count(),
src.enable_doc_mode());
// src subcolumns empty but src row count may not be 0
(*result)->assume_mutable()->insert_many_defaults(src.size());
// ColumnVariant should be finalized before parsing, finalize
maybe modify original column structure
@@ -286,7 +281,7 @@ private:
result_column->insert_default();
}
}
- *result = ColumnVariant::create(src.max_subcolumns_count(), type,
+ *result = ColumnVariant::create(src.max_subcolumns_count(),
src.enable_doc_mode(), type,
std::move(result_column));
(*result)->assume_mutable()->finalize();
return Status::OK();
@@ -296,7 +291,8 @@ private:
PathInData path(field_name);
ColumnVariant::Subcolumns subcolumns =
mutable_ptr->get_subcolumns();
const auto* node = subcolumns.find_exact(path);
- MutableColumnPtr result_col =
ColumnVariant::create(src.max_subcolumns_count());
+ MutableColumnPtr result_col =
+ ColumnVariant::create(src.max_subcolumns_count(),
src.enable_doc_mode());
ColumnVariant::Subcolumns new_subcolumns;
if (node != nullptr) {
@@ -321,19 +317,22 @@ private:
new_subcolumns.create_root(ColumnVariant::Subcolumn {
nodes[0]->data.get_finalized_column_ptr()->assume_mutable(),
nodes[0]->data.get_least_common_type(), true,
true});
- auto container =
ColumnVariant::create(src.max_subcolumns_count(),
-
std::move(new_subcolumns));
+ auto container =
+ ColumnVariant::create(src.max_subcolumns_count(),
src.enable_doc_mode(),
+ std::move(new_subcolumns));
result_col->insert_range_from(*container, 0,
container->size());
} else {
- auto container =
ColumnVariant::create(src.max_subcolumns_count(),
-
std::move(new_subcolumns));
+ auto container =
+ ColumnVariant::create(src.max_subcolumns_count(),
src.enable_doc_mode(),
+ std::move(new_subcolumns));
container->clear_sparse_column();
_extract_sparse_column_from_source(mutable_ptr, path,
container);
result_col->insert_range_from(*container, 0,
container->size());
}
} else {
- auto container =
ColumnVariant::create(src.max_subcolumns_count(),
-
std::move(new_subcolumns));
+ auto container =
+ ColumnVariant::create(src.max_subcolumns_count(),
src.enable_doc_mode(),
+ std::move(new_subcolumns));
const auto& sparse_offsets =
mutable_ptr->serialized_sparse_column_offsets();
if (sparse_offsets.back() == sparse_offsets[-1]) {
_extract_doc_value_column_from_source(mutable_ptr, path,
container);
diff --git a/be/src/exprs/table_function/vexplode.cpp
b/be/src/exprs/table_function/vexplode.cpp
index 8a0b961bb33..ae52cb9175b 100644
--- a/be/src/exprs/table_function/vexplode.cpp
+++ b/be/src/exprs/table_function/vexplode.cpp
@@ -49,6 +49,7 @@ Status VExplodeTableFunction::_process_init_variant(Block*
block, int value_colu
auto& variant_column =
assert_cast<ColumnVariant&>(*(column->assume_mutable()));
variant_column.finalize();
_detail.output_as_variant = true;
+ _detail.variant_enable_doc_mode = variant_column.enable_doc_mode();
if (!variant_column.is_null_root()) {
_array_column = variant_column.get_root();
// We need to wrap the output nested column within a variant column.
diff --git a/be/src/exprs/table_function/vexplode_v2.cpp
b/be/src/exprs/table_function/vexplode_v2.cpp
index 2894453948a..964bc8f2d6e 100644
--- a/be/src/exprs/table_function/vexplode_v2.cpp
+++ b/be/src/exprs/table_function/vexplode_v2.cpp
@@ -56,6 +56,7 @@ Status VExplodeV2TableFunction::_process_init_variant(Block*
block, int value_co
auto& variant_column =
assert_cast<ColumnVariant&>(*(column->assume_mutable()));
variant_column.finalize();
_multi_detail[children_column_idx].output_as_variant = true;
+ _multi_detail[children_column_idx].variant_enable_doc_mode =
variant_column.enable_doc_mode();
if (!variant_column.is_null_root()) {
_array_columns[children_column_idx] = variant_column.get_root();
// We need to wrap the output nested column within a variant column.
diff --git a/be/src/storage/segment/segment.cpp
b/be/src/storage/segment/segment.cpp
index 8f768ae4f89..79f2bc8a219 100644
--- a/be/src/storage/segment/segment.cpp
+++ b/be/src/storage/segment/segment.cpp
@@ -946,11 +946,12 @@ Status Segment::seek_and_read_by_rowid(const
TabletSchema& schema, SlotDescripto
// if segment cache miss, column reader will be created to make sure
the variant column result not coredump
RETURN_IF_ERROR(_create_column_meta_once(storage_read_options.stats));
+ const auto& dt_variant =
+ assert_cast<const
DataTypeVariant&>(*remove_nullable(slot->type()));
TabletColumn column = TabletColumn::create_materialized_variant_column(
schema.column_by_uid(slot->col_unique_id()).name_lower_case(),
slot->column_paths(),
- slot->col_unique_id(),
- assert_cast<const
DataTypeVariant&>(*remove_nullable(slot->type()))
- .variant_max_subcolumns_count());
+ slot->col_unique_id(),
dt_variant.variant_max_subcolumns_count(),
+ dt_variant.enable_doc_mode());
auto storage_type = get_data_type_of(column, storage_read_options);
MutableColumnPtr file_storage_column = storage_type->create_column();
DCHECK(storage_type != nullptr);
diff --git a/be/src/storage/segment/segment_writer.cpp
b/be/src/storage/segment/segment_writer.cpp
index 3f5fae5792c..deef80e8e20 100644
--- a/be/src/storage/segment/segment_writer.cpp
+++ b/be/src/storage/segment/segment_writer.cpp
@@ -171,6 +171,7 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta,
uint32_t column_id,
meta->set_be_exec_version(column.get_be_exec_version());
if (column.is_variant_type()) {
meta->set_variant_max_subcolumns_count(column.variant_max_subcolumns_count());
+ meta->set_variant_enable_doc_mode(column.variant_enable_doc_mode());
}
}
diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
index 8c219b2f982..e19804899a5 100644
--- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
+++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp
@@ -226,7 +226,7 @@ Status HierarchicalDataIterator::_process_nested_columns(
const auto* base_array =
check_and_get_column<ColumnArray>(*remove_nullable(entry.second[0].column));
MutableColumnPtr nested_object =
- ColumnVariant::create(0 /*no sparse column*/,
base_array->get_data().size());
+ ColumnVariant::create(0, false, base_array->get_data().size());
MutableColumnPtr offset =
base_array->get_offsets_ptr()->assume_mutable();
auto* nested_object_ptr =
assert_cast<ColumnVariant*>(nested_object.get());
// flatten nested arrays
@@ -271,7 +271,8 @@ Status HierarchicalDataIterator::_process_nested_columns(
}
Status HierarchicalDataIterator::_init_container(MutableColumnPtr& container,
size_t nrows,
- int32_t max_subcolumns_count)
{
+ int32_t max_subcolumns_count,
+ bool enable_doc_mode) {
// build variant as container
// add root first
if (_path.get_parts().empty() && _root_reader) {
@@ -290,12 +291,13 @@ Status
HierarchicalDataIterator::_init_container(MutableColumnPtr& container, si
auto nullable_column = make_nullable(column->get_ptr());
auto type = make_nullable(_root_reader->type);
// make sure the root type is nullable
- container = ColumnVariant::create(max_subcolumns_count, type,
+ container = ColumnVariant::create(max_subcolumns_count,
enable_doc_mode, type,
nullable_column->assume_mutable());
} else {
DataTypePtr root_type = std::make_shared<DataTypeNothing>();
auto column = ColumnNothing::create(nrows);
- container = ColumnVariant::create(max_subcolumns_count, root_type,
std::move(column));
+ container = ColumnVariant::create(max_subcolumns_count,
enable_doc_mode, root_type,
+ std::move(column));
}
auto& container_variant = assert_cast<ColumnVariant&>(*container);
@@ -334,7 +336,7 @@ Status
HierarchicalDataIterator::_init_container(MutableColumnPtr& container, si
RETURN_IF_ERROR(_process_nested_columns(container_variant,
nested_subcolumns, nrows));
{
SCOPED_RAW_TIMER(&_stats->variant_fill_path_from_sparse_column_timer_ns);
- RETURN_IF_ERROR(_process_sparse_column(container_variant, nrows));
+ RETURN_IF_ERROR(_process_binary_column(container_variant, nrows));
}
container_variant.set_num_rows(nrows);
@@ -352,7 +354,7 @@ static std::optional<std::string_view> get_sub_path(const
std::string_view& path
return path.substr(prefix.size() + 1);
}
-Status HierarchicalDataIterator::_process_sparse_column(ColumnVariant&
container_variant,
+Status HierarchicalDataIterator::_process_binary_column(ColumnVariant&
container_variant,
size_t nrows) {
container_variant.clear_sparse_column();
// process sparse column
@@ -368,6 +370,58 @@ Status
HierarchicalDataIterator::_process_sparse_column(ColumnVariant& container
}
ENABLE_CHECK_CONSISTENCY(&container_variant);
return Status::OK();
+ } else if (_read_type == ReadType::DOC_VALUE_COLUMN) {
+ // Doc mode hierarchical read: extract sub-paths matching prefix from
source
+ // doc_value and write them (with prefix stripped) into container's
doc_value.
+ // No subcolumn materialization — preserves doc-mode invariant.
+ const auto& src_map = assert_cast<const
ColumnMap&>(*_binary_column_reader->column);
+ const auto& src_offsets = src_map.get_offsets();
+ const auto& src_paths = assert_cast<const
ColumnString&>(src_map.get_keys());
+ const auto& src_values = assert_cast<const
ColumnString&>(src_map.get_values());
+
+ // Clear pre-initialized doc_value offsets (created by ColumnVariant
ctor with num_rows)
+ container_variant.get_doc_value_column()->assume_mutable()->clear();
+ auto [dst_paths, dst_values] =
container_variant.get_doc_value_data_paths_and_values();
+ auto& dst_offsets =
container_variant.serialized_doc_value_column_offsets();
+
+ StringRef prefix_ref(_path.get_path());
+ std::string_view path_prefix(prefix_ref.data, prefix_ref.size);
+
+ for (size_t i = 0; i != src_offsets.size(); ++i) {
+ size_t start = src_offsets[ssize_t(i) - 1];
+ size_t end = src_offsets[ssize_t(i)];
+ size_t lower_bound_index =
ColumnVariant::find_path_lower_bound_in_sparse_data(
+ prefix_ref, src_paths, start, end);
+ for (; lower_bound_index != end; ++lower_bound_index) {
+ auto path_ref = src_paths.get_data_at(lower_bound_index);
+ std::string_view path(path_ref.data, path_ref.size);
+ if (!path.starts_with(path_prefix)) {
+ break;
+ }
+ if (path.size() == path_prefix.size()) {
+ // Exact match (e.g. querying v['obj'] and path is 'obj')
→ root value
+ if (container_variant.is_null_root()) {
+
container_variant.get_subcolumn({})->resize(dst_offsets.size());
+ }
+
container_variant.get_subcolumn({})->deserialize_from_binary_column(
+ &src_values, lower_bound_index);
+ continue;
+ }
+ auto sub_path_optional = get_sub_path(path, path_prefix);
+ if (!sub_path_optional.has_value()) {
+ continue;
+ }
+ std::string_view sub_path = *sub_path_optional;
+ dst_paths->insert_data(sub_path.data(), sub_path.size());
+ dst_values->insert_from(src_values, lower_bound_index);
+ }
+ if (!container_variant.is_null_root() &&
+ container_variant.get_subcolumn({})->size() ==
dst_offsets.size()) {
+ container_variant.get_subcolumn({})->insert_default();
+ }
+ dst_offsets.push_back(dst_paths->size());
+ }
+ container_variant.get_sparse_column()->assume_mutable()->resize(nrows);
} else {
const auto& offsets =
assert_cast<const
ColumnMap&>(*_binary_column_reader->column).get_offsets();
diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.h
b/be/src/storage/segment/variant/hierarchical_data_iterator.h
index eb5e29093e3..8c0e3366b1e 100644
--- a/be/src/storage/segment/variant/hierarchical_data_iterator.h
+++ b/be/src/storage/segment/variant/hierarchical_data_iterator.h
@@ -122,14 +122,15 @@ private:
ColumnVariant& container_variant,
const std::map<PathInData, PathsWithColumnAndType>&
nested_subcolumns, size_t nrows);
- Status _process_sparse_column(ColumnVariant& container_variant, size_t
nrows);
+ Status _process_binary_column(ColumnVariant& container_variant, size_t
nrows);
// 1. add root column
// 2. collect path for subcolumns and nested subcolumns
// 3. init container with subcolumns
// 4. init container with nested subcolumns
// 5. init container with sparse column
- Status _init_container(MutableColumnPtr& container, size_t nrows, int
max_subcolumns_count);
+ Status _init_container(MutableColumnPtr& container, size_t nrows, int
max_subcolumns_count,
+ bool enable_doc_mode);
// clear all subcolumns's column data for next batch read
// set null map for nullable column
@@ -170,7 +171,8 @@ private:
}
MutableColumnPtr container;
- RETURN_IF_ERROR(_init_container(container, nrows,
variant.max_subcolumns_count()));
+ RETURN_IF_ERROR(_init_container(container, nrows,
variant.max_subcolumns_count(),
+ variant.enable_doc_mode()));
auto& container_variant = assert_cast<ColumnVariant&>(*container);
variant.insert_range_from(container_variant, 0, nrows);
diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp
b/be/src/storage/segment/variant/variant_column_reader.cpp
index 93f896c80b5..9d06b6ebb84 100644
--- a/be/src/storage/segment/variant/variant_column_reader.cpp
+++ b/be/src/storage/segment/variant/variant_column_reader.cpp
@@ -309,8 +309,10 @@ Result<BinaryColumnCacheSPtr>
VariantColumnReader::_get_binary_column_cache(
DataTypePtr create_variant_type(const TabletColumn& target_col) {
return target_col.is_nullable()
? make_nullable(std::make_shared<DataTypeVariant>(
- target_col.variant_max_subcolumns_count()))
- :
std::make_shared<DataTypeVariant>(target_col.variant_max_subcolumns_count());
+ target_col.variant_max_subcolumns_count(),
+ target_col.variant_enable_doc_mode()))
+ :
std::make_shared<DataTypeVariant>(target_col.variant_max_subcolumns_count(),
+
target_col.variant_enable_doc_mode());
}
Status VariantColumnReader::_build_read_plan_flat_leaves(
@@ -782,8 +784,14 @@ Status VariantColumnReader::_build_read_plan(ReadPlan*
plan, const TabletColumn&
if (_has_prefix_path_unlocked(relative_path)) {
// Example {"b" : {"c":456,"e":7.111}}
// b.c is sparse column, b.e is subcolumn, so b is both the prefix of
sparse column and
- // subcolumn
- plan->kind = ReadKind::HIERARCHICAL;
+ // subcolumn.
+ // Doc mode: prefer extracting hierarchy from doc_value column to
preserve doc mode
+ // invariant (root-only + doc_value). Non-doc mode: read from
subcolumns + sparse.
+ if (target_col.variant_enable_doc_mode()) {
+ plan->kind = ReadKind::HIERARCHICAL_DOC;
+ } else {
+ plan->kind = ReadKind::HIERARCHICAL;
+ }
plan->type = create_variant_type(target_col);
plan->relative_path = relative_path;
plan->node = node;
@@ -1417,7 +1425,7 @@ Status
VariantRootColumnIterator::_process_root_column(MutableColumnPtr& dst,
}
// add root column to a tmp object column
- auto tmp = ColumnVariant::create(0, root_column->size());
+ auto tmp = ColumnVariant::create(0, obj.enable_doc_mode(),
root_column->size());
auto& tmp_obj = assert_cast<ColumnVariant&>(*tmp);
tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
//
tmp_obj.get_sparse_column()->assume_mutable()->insert_many_defaults(root_column->size());
diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp
b/be/src/storage/segment/variant/variant_column_writer_impl.cpp
index 3017881a4fb..4f87a1640b3 100644
--- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp
+++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp
@@ -84,6 +84,7 @@ void _init_column_meta(ColumnMetaPB* meta, uint32_t
column_id, const TabletColum
}
if (column.is_variant_type()) {
meta->set_variant_max_subcolumns_count(column.variant_max_subcolumns_count());
+ meta->set_variant_enable_doc_mode(column.variant_enable_doc_mode());
}
}
@@ -1167,7 +1168,7 @@ Status VariantColumnWriterImpl::init() {
if (_opts.rowset_ctx->write_type == DataWriteType::TYPE_DIRECT) {
count = 0;
}
- _column = ColumnVariant::create(count);
+ _column = ColumnVariant::create(count,
_tablet_column->variant_enable_doc_mode());
return Status::OK();
}
@@ -1589,7 +1590,7 @@ VariantSubcolumnWriter::VariantSubcolumnWriter(const
ColumnWriterOptions& opts,
: ColumnWriter(std::move(field), opts.meta->is_nullable(), opts.meta) {
_tablet_column = column;
_opts = opts;
- _column = ColumnVariant::create(0);
+ _column = ColumnVariant::create(0, false);
}
Status VariantSubcolumnWriter::init() {
@@ -1715,7 +1716,7 @@ VariantDocCompactWriter::VariantDocCompactWriter(const
ColumnWriterOptions& opts
: ColumnWriter(std::move(field), opts.meta->is_nullable(), opts.meta) {
_opts = opts;
_tablet_column = column;
- _column = ColumnVariant::create(0);
+ _column = ColumnVariant::create(0, false);
}
Status VariantDocCompactWriter::init() {
diff --git
a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
index acc90eea121..7f2d63a0457 100644
--- a/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
+++ b/be/src/storage/segment/variant/variant_doc_snpashot_compact_iterator.h
@@ -55,7 +55,8 @@ private:
Status _set_doc_value_into_variant(MutableColumnPtr& dst,
MutableColumnPtr&& doc_value_column,
size_t count) const {
auto& variant = assert_cast<ColumnVariant&>(*dst);
- MutableColumnPtr container =
ColumnVariant::create(variant.max_subcolumns_count(), count);
+ MutableColumnPtr container =
ColumnVariant::create(variant.max_subcolumns_count(),
+
variant.enable_doc_mode(), count);
auto& container_variant = assert_cast<ColumnVariant&>(*container);
container_variant.set_doc_value_column(std::move(doc_value_column));
variant.insert_range_from(container_variant, 0, count);
diff --git
a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
index d6a2817b6d8..0b993ac8a88 100644
--- a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
+++ b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp
@@ -109,7 +109,7 @@ Status
VariantStreamingCompactionWriter::_append_input_from_raw(const uint8_t**
Status VariantStreamingCompactionWriter::_append_input(const ColumnVariant&
src, size_t row_pos,
size_t num_rows,
const uint8_t*
outer_null_map) {
- auto chunk_variant = ColumnVariant::create(0);
+ auto chunk_variant = ColumnVariant::create(0, src.enable_doc_mode());
chunk_variant->insert_range_from(src, row_pos, num_rows);
RETURN_IF_ERROR(chunk_variant->sanitize());
chunk_variant->finalize();
diff --git a/be/src/storage/segment/vertical_segment_writer.cpp
b/be/src/storage/segment/vertical_segment_writer.cpp
index 215d069425f..a3728111fa2 100644
--- a/be/src/storage/segment/vertical_segment_writer.cpp
+++ b/be/src/storage/segment/vertical_segment_writer.cpp
@@ -174,6 +174,7 @@ void VerticalSegmentWriter::_init_column_meta(ColumnMetaPB*
meta, uint32_t colum
}
if (column.is_variant_type()) {
meta->set_variant_max_subcolumns_count(column.variant_max_subcolumns_count());
+ meta->set_variant_enable_doc_mode(column.variant_enable_doc_mode());
}
meta->set_result_is_nullable(column.get_result_is_nullable());
meta->set_function_name(column.get_aggregation_name());
diff --git a/be/src/storage/tablet/tablet_meta.cpp
b/be/src/storage/tablet/tablet_meta.cpp
index 6d00beb8d2b..05e01fec6d8 100644
--- a/be/src/storage/tablet/tablet_meta.cpp
+++ b/be/src/storage/tablet/tablet_meta.cpp
@@ -561,8 +561,8 @@ void TabletMeta::init_column_from_tcolumn(uint32_t
unique_id, const TColumn& tco
if (tcolumn.__isset.variant_sparse_hash_shard_count) {
column->set_variant_sparse_hash_shard_count(tcolumn.variant_sparse_hash_shard_count);
}
- if (tcolumn.__isset.variant_enable_doc_mode) {
- column->set_variant_enable_doc_mode(tcolumn.variant_enable_doc_mode);
+ if (tcolumn.column_type.__isset.variant_enable_doc_mode) {
+
column->set_variant_enable_doc_mode(tcolumn.column_type.variant_enable_doc_mode);
}
if (tcolumn.__isset.variant_doc_materialization_min_rows) {
column->set_variant_doc_materialization_min_rows(
diff --git a/be/src/storage/tablet/tablet_schema.cpp
b/be/src/storage/tablet/tablet_schema.cpp
index 432552de37e..9caa121bcc3 100644
--- a/be/src/storage/tablet/tablet_schema.cpp
+++ b/be/src/storage/tablet/tablet_schema.cpp
@@ -689,7 +689,8 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
TabletColumn TabletColumn::create_materialized_variant_column(const
std::string& root,
const
std::vector<std::string>& paths,
int32_t
parent_unique_id,
- int32_t
max_subcolumns_count) {
+ int32_t
max_subcolumns_count,
+ bool
enable_doc_mode) {
TabletColumn subcol;
subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
subcol.set_is_nullable(true);
@@ -699,6 +700,7 @@ TabletColumn
TabletColumn::create_materialized_variant_column(const std::string&
subcol.set_path_info(path);
subcol.set_name(path.get_path());
subcol.set_variant_max_subcolumns_count(max_subcolumns_count);
+ subcol.set_variant_enable_doc_mode(enable_doc_mode);
return subcol;
}
diff --git a/be/src/storage/tablet/tablet_schema.h
b/be/src/storage/tablet/tablet_schema.h
index f5acd96f65a..7564872fdd2 100644
--- a/be/src/storage/tablet/tablet_schema.h
+++ b/be/src/storage/tablet/tablet_schema.h
@@ -132,7 +132,8 @@ public:
static TabletColumn create_materialized_variant_column(const std::string&
root,
const
std::vector<std::string>& paths,
int32_t
parent_unique_id,
- int32_t
max_subcolumns_count);
+ int32_t
max_subcolumns_count,
+ bool
enable_doc_mode = false);
bool has_default_value() const { return _has_default_value; }
std::string default_value() const { return _default_value; }
int32_t length() const { return _length; }
diff --git a/be/test/core/column/column_variant_test.cpp
b/be/test/core/column/column_variant_test.cpp
index 03ae171b242..8c9549a7dae 100644
--- a/be/test/core/column/column_variant_test.cpp
+++ b/be/test/core/column/column_variant_test.cpp
@@ -65,7 +65,7 @@ protected:
test_data_dir = root_dir + "/be/test/data/vec/columns";
test_result_dir = root_dir + "/be/test/expected_result/vec/columns";
- column_variant = ColumnVariant::create(true);
+ column_variant = ColumnVariant::create(0, false);
std::cout << dt_variant->get_name() << std::endl;
load_json_columns_data();
@@ -116,7 +116,7 @@ protected:
template <typename T>
void column_common_test(T callback) {
- callback(ColumnVariant(true), column_variant->get_ptr());
+ callback(ColumnVariant(0, false), column_variant->get_ptr());
}
void hash_common_test(
@@ -765,7 +765,7 @@ TEST_F(ColumnVariantTest, empty_inset_range_from) {
EXPECT_EQ(src->size(), 6);
// dst is an empty column
- auto dst = ColumnVariant::create(5);
+ auto dst = ColumnVariant::create(5, false);
// subcolumn->subcolumn v.a v.b v.c v.f v.e
dst->insert_range_from(*src, 0, 6);
@@ -951,7 +951,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
// Test case 1: Insert from scalar variant source to empty destination
{
// Create source column with scalar values
- auto src_column = ColumnVariant::create(true);
+ auto src_column = ColumnVariant::create(0, false);
VariantUtil::insert_root_scalar_field(*src_column,
Field::create_field<TYPE_INT>(123));
VariantUtil::insert_root_scalar_field(*src_column,
Field::create_field<TYPE_INT>(456));
src_column->finalize();
@@ -960,7 +960,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
EXPECT_EQ(src_column->size(), 2);
// Create empty destination column
- auto dst_column = ColumnVariant::create(true);
+ auto dst_column = ColumnVariant::create(0, false);
EXPECT_EQ(dst_column->size(), 0);
// Create indices
@@ -991,14 +991,14 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
// Test case 2: Insert from scalar variant source to non-empty destination
of same type
{
// Create source column with scalar values
- auto src_column = ColumnVariant::create(true);
+ auto src_column = ColumnVariant::create(0, false);
VariantUtil::insert_root_scalar_field(*src_column,
Field::create_field<TYPE_INT>(123));
VariantUtil::insert_root_scalar_field(*src_column,
Field::create_field<TYPE_INT>(456));
src_column->finalize();
EXPECT_TRUE(src_column->is_scalar_variant());
// Create destination column with same type
- auto dst_column = ColumnVariant::create(true);
+ auto dst_column = ColumnVariant::create(0, false);
VariantUtil::insert_root_scalar_field(*dst_column,
Field::create_field<TYPE_INT>(789));
dst_column->finalize();
EXPECT_TRUE(dst_column->is_scalar_variant());
@@ -1027,7 +1027,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
// Test case 3: Insert from non-scalar or different type source (fallback
to try_insert)
{
// Create source column with object values (non-scalar)
- auto src_column = ColumnVariant::create(true);
+ auto src_column = ColumnVariant::create(0, false);
// Create a map with {"a": 123}
Field field_map = Field::create_field<TYPE_VARIANT>(VariantMap());
@@ -1050,7 +1050,7 @@ TEST_F(ColumnVariantTest, test_insert_indices_from) {
EXPECT_FALSE(src_column->is_scalar_variant());
// Create destination column (empty)
- auto dst_column = ColumnVariant::create(true);
+ auto dst_column = ColumnVariant::create(0, false);
// Create indices
std::vector<uint32_t> indices = {1, 0};
@@ -1173,7 +1173,7 @@ TEST_F(ColumnVariantTest, field_test) {
}
};
ColumnVariant::MutablePtr obj;
- obj = ColumnVariant::create(1);
+ obj = ColumnVariant::create(1, false);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json +
"json_variant/object_boundary.jsonl";
@@ -1214,7 +1214,7 @@ TEST_F(ColumnVariantTest, serialize_one_row_to_string) {
{
// TEST SCALA_VARAINT
// 1. create an empty variant column
- auto v = ColumnVariant::create(true);
+ auto v = ColumnVariant::create(0, false);
auto dt =
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_STRING,
0,
0);
auto cs = dt->create_column();
@@ -1663,7 +1663,7 @@ TEST_F(ColumnVariantTest, get_subcolumn) {
TEST_F(ColumnVariantTest, ensure_root_node_type) {
ColumnVariant::MutablePtr obj;
- obj = ColumnVariant::create(1);
+ obj = ColumnVariant::create(1, false);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json +
"json_variant/object_boundary.jsonl";
@@ -2124,7 +2124,7 @@ TEST_F(ColumnVariantTest,
find_path_lower_bound_in_sparse_data) {
}
};
ColumnVariant::MutablePtr obj;
- obj = ColumnVariant::create(1);
+ obj = ColumnVariant::create(1, false);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json +
"json_variant/object_boundary.jsonl";
@@ -2137,7 +2137,7 @@ TEST_F(ColumnVariantTest,
find_path_lower_bound_in_sparse_data) {
// used in BinaryColumnExtractIterator::_fill_path_column
TEST_F(ColumnVariantTest, fill_path_column_from_sparse_data) {
ColumnVariant::MutablePtr obj;
- obj = ColumnVariant::create(1);
+ obj = ColumnVariant::create(1, false);
MutableColumns cols;
cols.push_back(obj->get_ptr());
const auto& json_file_obj = test_data_dir_json +
"json_variant/object_boundary.jsonl";
@@ -2164,7 +2164,7 @@ TEST_F(ColumnVariantTest,
fill_path_column_from_sparse_data) {
TEST_F(ColumnVariantTest, not_finalized) {
ColumnVariant::MutablePtr obj;
- obj = ColumnVariant::create(1);
+ obj = ColumnVariant::create(1, false);
MutableColumns cols;
cols.push_back(obj->get_ptr());
@@ -2331,7 +2331,7 @@ TEST_F(ColumnVariantTest, array_field_operations) {
{
// Test wrapp_array_nullable
// 1. create an empty variant column
- auto variant = ColumnVariant::create(2);
+ auto variant = ColumnVariant::create(2, false);
std::vector<std::pair<std::string, doris::Field>> data;
@@ -2376,12 +2376,12 @@ TEST_F(ColumnVariantTest, assert_exception_happen) {
dynamic_subcolumns.add(PathInData("v.b.d"), ColumnVariant::Subcolumn
{0, true});
dynamic_subcolumns.add(PathInData("v.c.d"), ColumnVariant::Subcolumn
{0, true});
std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size()
<< std::endl;
- EXPECT_ANY_THROW(ColumnVariant::create(2,
std::move(dynamic_subcolumns)));
+ EXPECT_ANY_THROW(ColumnVariant::create(2, false,
std::move(dynamic_subcolumns)));
}
{
// 1. create an empty variant column
- auto variant = ColumnVariant::create(5);
+ auto variant = ColumnVariant::create(5, false);
std::vector<std::pair<std::string, doris::Field>> data;
@@ -2467,7 +2467,7 @@ TEST_F(ColumnVariantTest, try_insert_default_from_nested)
{
ColumnVariant::Subcolumn {std::move(column),
array_type, false, false});
dynamic_subcolumns.add(PathInData("v.c.d"), ColumnVariant::Subcolumn {0,
true});
std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() <<
std::endl;
- auto obj = ColumnVariant::create(5, std::move(dynamic_subcolumns));
+ auto obj = ColumnVariant::create(5, false, std::move(dynamic_subcolumns));
for (auto& entry : obj->get_subcolumns()) {
std::cout << "entry path: " << entry->path.get_path() << std::endl;
@@ -2504,7 +2504,7 @@ TEST_F(ColumnVariantTest, unnest) {
ColumnVariant::Subcolumn {std::move(nested_col),
ColumnVariant::NESTED_TYPE, true, false});
std::cout << "dynamic_subcolumns size: " << dynamic_subcolumns.size() <<
std::endl;
- auto obj = ColumnVariant::create(2, std::move(dynamic_subcolumns));
+ auto obj = ColumnVariant::create(2, false, std::move(dynamic_subcolumns));
obj->set_num_rows(2);
EXPECT_TRUE(!obj->empty());
std::cout << obj->size() << std::endl;
@@ -2513,7 +2513,7 @@ TEST_F(ColumnVariantTest, unnest) {
TEST_F(ColumnVariantTest, path_in_data_builder_test) {
// Create a ColumnVariant with nested subcolumns
- auto variant = ColumnVariant::create(5);
+ auto variant = ColumnVariant::create(5, false);
// Test case 1: Build a nested path with PathInDataBuilder
{
@@ -3171,7 +3171,7 @@ TEST_F(ColumnVariantTest, subcolumn_operations_coverage) {
col_arr->insert(an);
col_arr->insert(an);
MutableColumnPtr nested_object = ColumnVariant::create(
- container_variant.max_subcolumns_count(),
col_arr->get_data().size());
+ container_variant.max_subcolumns_count(), false,
col_arr->get_data().size());
MutableColumnPtr offset =
col_arr->get_offsets_ptr()->assume_mutable(); // [3, 3, 4]
auto* nested_object_ptr =
assert_cast<ColumnVariant*>(nested_object.get());
// flatten nested arrays
@@ -3203,9 +3203,9 @@ TEST_F(ColumnVariantTest, subcolumn_operations_coverage) {
// Test is_empty_nested
{
- auto v = ColumnVariant::create(1);
+ auto v = ColumnVariant::create(1, false);
auto sub_dt = make_nullable(std::make_unique<DataTypeArray>(
- make_nullable(std::make_unique<DataTypeVariant>(1))));
+ make_nullable(std::make_unique<DataTypeVariant>(1, false))));
auto sub_col = sub_dt->create_column();
std::vector<std::pair<std::string, doris::Field>> data;
@@ -3468,7 +3468,7 @@ TEST_F(ColumnVariantTest,
subcolumn_insert_range_from_test_advanced) {
}
TEST_F(ColumnVariantTest, test_variant_no_data_insert) {
- auto variant = ColumnVariant::create(1);
+ auto variant = ColumnVariant::create(1, false);
variant->insert_many_defaults(10);
EXPECT_EQ(variant->size(), 10);
EXPECT_TRUE(variant->only_have_default_values());
diff --git a/be/test/exec/common/schema_util_rowset_test.cpp
b/be/test/exec/common/schema_util_rowset_test.cpp
index aa9c2fd13cd..cf99c982495 100644
--- a/be/test/exec/common/schema_util_rowset_test.cpp
+++ b/be/test/exec/common/schema_util_rowset_test.cpp
@@ -706,7 +706,7 @@ TEST_F(SchemaUtilRowsetTest,
some_test_for_subcolumn_writer) {
auto size = variant_subcolumn_writer->estimate_buffer_size();
std::cout << "size: " << size << std::endl;
// append data
- auto insert_object = ColumnVariant::create(true);
+ auto insert_object = ColumnVariant::create(0, false);
fill_varaint_column(insert_object, 1, 1);
std::cout << insert_object->debug_string() << std::endl;
std::unique_ptr<VariantColumnData> _variant_column_data =
std::make_unique<VariantColumnData>();
diff --git a/be/test/exec/common/schema_util_test.cpp
b/be/test/exec/common/schema_util_test.cpp
index 1a9b4617c52..db13e17159d 100644
--- a/be/test/exec/common/schema_util_test.cpp
+++ b/be/test/exec/common/schema_util_test.cpp
@@ -707,8 +707,8 @@ TEST_F(SchemaUtilTest, TestParseVariantColumns) {
Block block;
// Create a variant column with JSON string data
- auto variant_type = std::make_shared<DataTypeVariant>(10);
- auto variant_column = ColumnVariant::create(10);
+ auto variant_type = std::make_shared<DataTypeVariant>(10, false);
+ auto variant_column = ColumnVariant::create(10, false);
auto root_column = ColumnString::create();
root_column->insert(Field::create_field<PrimitiveType::TYPE_STRING>("{'a':
1, 'b': 'test'}"));
variant_column->create_root(std::make_shared<DataTypeString>(),
root_column->get_ptr());
@@ -773,7 +773,7 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) {
EXPECT_EQ(result->size(), 1);
// Test casting to variant type
- auto variant_type = std::make_shared<DataTypeVariant>(10);
+ auto variant_type = std::make_shared<DataTypeVariant>(10, false);
auto nullable_array_type =
make_nullable(std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>()));
auto array_column =
@@ -801,7 +801,7 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) {
EXPECT_TRUE(result1->is_nullable());
// Test casting from variant to variant
- auto variant_column = ColumnVariant::create(10);
+ auto variant_column = ColumnVariant::create(10, false);
variant_column->create_root(nullable_array_type,
nullable_array_column->assume_mutable());
ColumnWithTypeAndName variant_col;
@@ -1215,8 +1215,8 @@ TEST_F(SchemaUtilTest, TestParseVariantColumnsEdgeCases) {
Block block;
// Test parsing from string to variant
- auto variant_type = std::make_shared<DataTypeVariant>(10);
- auto variant_column = ColumnVariant::create(10);
+ auto variant_type = std::make_shared<DataTypeVariant>(10, false);
+ auto variant_column = ColumnVariant::create(10, false);
auto root_column = ColumnString::create();
// Add some test JSON data
@@ -1239,7 +1239,7 @@ TEST_F(SchemaUtilTest, TestParseVariantColumnsEdgeCases) {
auto jsonb_column = ColumnString::create();
jsonb_column->insert(Field::create_field<PrimitiveType::TYPE_STRING>("{'x':
1}"));
- auto variant_column2 = ColumnVariant::create(10);
+ auto variant_column2 = ColumnVariant::create(10, false);
variant_column2->create_root(jsonb_type, jsonb_column->get_ptr());
Block block2;
@@ -1249,7 +1249,7 @@ TEST_F(SchemaUtilTest, TestParseVariantColumnsEdgeCases) {
EXPECT_TRUE(status.ok());
// Test parsing already parsed variant
- auto variant_column3 = ColumnVariant::create(10);
+ auto variant_column3 = ColumnVariant::create(10, false);
variant_column3->finalize();
Block block3;
@@ -1263,14 +1263,14 @@ TEST_F(SchemaUtilTest,
TestParseVariantColumnsWithNulls) {
Block block;
// Create a nullable variant column
- auto variant_type = make_nullable(std::make_shared<DataTypeVariant>(10));
+ auto variant_type = make_nullable(std::make_shared<DataTypeVariant>(10,
false));
auto string_type = make_nullable(std::make_shared<DataTypeString>());
auto string_column = ColumnString::create();
string_column->insert(Field::create_field<PrimitiveType::TYPE_STRING>("{'a':
1}"));
auto nullable_string = make_nullable(string_column->get_ptr());
- auto variant_column = ColumnVariant::create(10);
+ auto variant_column = ColumnVariant::create(10, false);
variant_column->create_root(string_type,
nullable_string->assume_mutable());
auto nullable_variant = make_nullable(variant_column->get_ptr());
@@ -1866,7 +1866,7 @@ TEST_F(SchemaUtilTest,
parse_and_materialize_variant_columns_ambiguous_paths) {
dynamic_subcolumns.create_root(
ColumnVariant::Subcolumn(string_col->assume_mutable(),
string_type, true));
- auto variant_col = ColumnVariant::create(0, std::move(dynamic_subcolumns));
+ auto variant_col = ColumnVariant::create(0, false,
std::move(dynamic_subcolumns));
auto variant_type = std::make_shared<DataTypeVariant>();
// Construct the block
diff --git a/be/test/exprs/function/cast/function_variant_cast_test.cpp
b/be/test/exprs/function/cast/function_variant_cast_test.cpp
index 2f97e2e85eb..2ee76058bc6 100644
--- a/be/test/exprs/function/cast/function_variant_cast_test.cpp
+++ b/be/test/exprs/function/cast/function_variant_cast_test.cpp
@@ -49,7 +49,7 @@ static doris::Field construct_variant_map(
static auto construct_basic_varint_column() {
// 1. create an empty variant column
- auto variant = ColumnVariant::create(5);
+ auto variant = ColumnVariant::create(5, false);
std::vector<std::pair<std::string, doris::Field>> data;
@@ -171,7 +171,7 @@ TEST(FunctionVariantCast, CastFromVariant) {
{
auto variant_type = std::make_shared<DataTypeVariant>();
auto int32_type = std::make_shared<DataTypeInt32>();
- auto variant_col = ColumnVariant::create(0);
+ auto variant_col = ColumnVariant::create(0, false);
// Create a variant column with integer values
variant_col->create_root(int32_type, ColumnInt32::create());
@@ -210,7 +210,7 @@ TEST(FunctionVariantCast, CastFromVariant) {
{
auto variant_type = std::make_shared<DataTypeVariant>();
auto string_type = std::make_shared<DataTypeString>();
- auto variant_col = ColumnVariant::create(0);
+ auto variant_col = ColumnVariant::create(0, false);
// Create a variant column with string values
variant_col->create_root(string_type, ColumnString::create());
@@ -246,7 +246,7 @@ TEST(FunctionVariantCast, CastFromVariant) {
{
auto variant_type = std::make_shared<DataTypeVariant>();
auto array_type =
std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt32>());
- auto variant_col = ColumnVariant::create(0);
+ auto variant_col = ColumnVariant::create(0, false);
// Create a variant column with array values
variant_col->create_root(
@@ -294,7 +294,7 @@ TEST(FunctionVariantCast, CastVariantWithNull) {
auto nullable_int32_type = std::make_shared<DataTypeNullable>(int32_type);
// Create a variant column with nullable integer values
- auto variant_col = ColumnVariant::create(0);
+ auto variant_col = ColumnVariant::create(0, false);
variant_col->create_root(nullable_int32_type,
ColumnNullable::create(ColumnInt32::create(),
ColumnUInt8::create()));
MutableColumnPtr data = variant_col->get_root();
@@ -343,7 +343,7 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
ColumnVariant::Subcolumns dynamic_subcolumns;
dynamic_subcolumns.add(PathInData(ColumnVariant::COLUMN_NAME_DUMMY),
ColumnVariant::Subcolumn {root->get_ptr(),
int32_type, true, true});
- auto variant_col = ColumnVariant::create(0,
std::move(dynamic_subcolumns));
+ auto variant_col = ColumnVariant::create(0, false,
std::move(dynamic_subcolumns));
variant_col->finalize();
ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
@@ -481,7 +481,7 @@ TEST(FunctionVariantCast,
CastFromVariantStrictModeRegression) {
auto nullable_int32_type =
std::make_shared<DataTypeNullable>(int32_type);
// Create variant column with nullable integer root (some null, some
not)
- auto variant_col = ColumnVariant::create(0);
+ auto variant_col = ColumnVariant::create(0, false);
variant_col->create_root(
nullable_int32_type,
ColumnNullable::create(ColumnInt32::create(),
ColumnUInt8::create()));
@@ -557,7 +557,7 @@ TEST(FunctionVariantCast,
CastFromVariantStrictModeRegression) {
auto nullable_string_type =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
- auto variant_col = ColumnVariant::create(0);
+ auto variant_col = ColumnVariant::create(0, false);
variant_col->create_root(
nullable_string_type,
ColumnNullable::create(ColumnString::create(),
ColumnUInt8::create()));
diff --git a/be/test/exprs/function/function_variant_element_test.cpp
b/be/test/exprs/function/function_variant_element_test.cpp
index 76ac61a98ba..d4d413a601a 100644
--- a/be/test/exprs/function/function_variant_element_test.cpp
+++ b/be/test/exprs/function/function_variant_element_test.cpp
@@ -22,7 +22,7 @@
namespace doris {
TEST(function_variant_element_test, extract_from_sparse_column) {
- auto variant_column = ColumnVariant::create(1 /*max_subcolumns_count*/);
+ auto variant_column = ColumnVariant::create(1 /*max_subcolumns_count*/,
false);
auto* variant_ptr = assert_cast<ColumnVariant*>(variant_column.get());
ColumnVariant::Subcolumn subcolumn(0, true, false);
diff --git a/be/test/storage/segment/hierarchical_data_iterator_test.cpp
b/be/test/storage/segment/hierarchical_data_iterator_test.cpp
index 107cbd0b1ce..614e73dff6a 100644
--- a/be/test/storage/segment/hierarchical_data_iterator_test.cpp
+++ b/be/test/storage/segment/hierarchical_data_iterator_test.cpp
@@ -96,10 +96,10 @@ TEST(HierarchicalDataIteratorTest,
ProcessSparseExtractSubpaths) {
offs.push_back(keys.size());
const size_t nrows = 2;
- MutableColumnPtr dst = ColumnVariant::create(/*max_subcolumns_count*/ 2,
nrows);
+ MutableColumnPtr dst = ColumnVariant::create(/*max_subcolumns_count*/ 2,
false, nrows);
auto& variant = assert_cast<ColumnVariant&>(*dst);
- ASSERT_TRUE(hiter->_process_sparse_column(variant, nrows).ok());
+ ASSERT_TRUE(hiter->_process_binary_column(variant, nrows).ok());
// root column + 2 subcolumns
EXPECT_EQ(variant.get_subcolumns().size(), 3);
diff --git a/be/test/storage/segment/nested_group_provider_test.cpp
b/be/test/storage/segment/nested_group_provider_test.cpp
index f59ac52cfaa..7baa6f51fcb 100644
--- a/be/test/storage/segment/nested_group_provider_test.cpp
+++ b/be/test/storage/segment/nested_group_provider_test.cpp
@@ -58,7 +58,7 @@ TEST(NestedGroupProviderTest, DefaultWriteProviderIsNoOp) {
GTEST_SKIP() << "EE build: write provider has real implementation";
}
- auto column_variant = ColumnVariant::create(0);
+ auto column_variant = ColumnVariant::create(0, false);
ColumnWriterOptions opts;
VariantStatistics statistics;
diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp
b/be/test/storage/segment/variant_column_writer_reader_test.cpp
index 2fa6642e340..e309aa6f2b1 100644
--- a/be/test/storage/segment/variant_column_writer_reader_test.cpp
+++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp
@@ -230,8 +230,8 @@ protected:
for (const auto& batch : batches) {
Block block = _tablet_schema->create_block();
auto columns = block.mutate_columns();
- auto variant_col =
-
ColumnVariant::create(_tablet_schema->column(0).variant_max_subcolumns_count());
+ auto variant_col = ColumnVariant::create(
+ _tablet_schema->column(0).variant_max_subcolumns_count(),
false);
auto json_col = ColumnString::create();
for (const auto& json : batch) {
json_col->insert_data(json.data(), json.size());
@@ -271,8 +271,8 @@ protected:
Block block = _tablet_schema->create_block();
auto columns = block.mutate_columns();
- auto variant_col =
-
ColumnVariant::create(_tablet_schema->column(0).variant_max_subcolumns_count());
+ auto variant_col = ColumnVariant::create(
+ _tablet_schema->column(0).variant_max_subcolumns_count(),
false);
auto json_col = ColumnString::create();
for (const auto& json : jsons) {
json_col->insert_data(json.data(), json.size());
@@ -432,7 +432,7 @@ static std::set<std::string> collect_regular_paths(
static std::vector<std::string> normalize_json_rows(const
std::vector<std::string>& jsons,
int
variant_max_subcolumns_count) {
- auto variant_col = ColumnVariant::create(variant_max_subcolumns_count);
+ auto variant_col = ColumnVariant::create(variant_max_subcolumns_count,
false);
auto json_col = ColumnString::create();
for (const auto& json : jsons) {
json_col->insert_data(json.data(), json.size());
@@ -685,7 +685,7 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_data_normal) {
}
auto read_to_column_object = [&](ColumnIteratorUPtr& it) {
- new_column_object = ColumnVariant::create(3);
+ new_column_object = ColumnVariant::create(3, false);
nrows = 1000;
st = it->seek_to_ordinal(0);
EXPECT_TRUE(st.ok()) << st.msg();
@@ -872,7 +872,8 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_data_normal) {
// test VariantRootColumnIterator for next_batch and read_by_rowids
{
auto iter = assert_cast<VariantRootColumnIterator*>(it3.get());
- auto nullable_dt =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(3));
+ auto nullable_dt =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeVariant>(3, false));
MutableColumnPtr root_column_object = nullable_dt->create_column();
nrows = 1000;
st = iter->seek_to_ordinal(0);
@@ -981,19 +982,19 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_data_normal) {
for (int i = 0; i < 1000; ++i) {
row_ids1.push_back(i);
}
- MutableColumnPtr sparse_dst1 = ColumnVariant::create(3);
+ MutableColumnPtr sparse_dst1 = ColumnVariant::create(3, false);
st = iter->read_by_rowids(row_ids1.data(), row_ids1.size(),
sparse_dst1);
EXPECT_TRUE(st.ok()) << st.msg();
EXPECT_TRUE(sparse_dst1->size() == row_ids1.size());
// test to nullable column object
std::cout << "test 2 " << std::endl;
MutableColumnPtr sparse_dst2 =
- ColumnNullable::create(ColumnVariant::create(3),
ColumnUInt8::create());
+ ColumnNullable::create(ColumnVariant::create(3, false),
ColumnUInt8::create());
st = iter->read_by_rowids(row_ids1.data(), row_ids1.size(),
sparse_dst2);
EXPECT_TRUE(st.ok()) << st.msg();
EXPECT_TRUE(sparse_dst2->size() == row_ids1.size());
std::cout << "test 3" << std::endl;
- MutableColumnPtr sparse_dst3 = ColumnVariant::create(3);
+ MutableColumnPtr sparse_dst3 = ColumnVariant::create(3, false);
size_t rs = 1000;
bool has_null = false;
st = iter->seek_to_ordinal(0);
@@ -1528,11 +1529,11 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_doc_compact_writer_and_read_doc
config.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
MutableColumnPtr root_variant =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
variant_util::parse_json_to_variant(*root_variant, *full_strings, config);
MutableColumnPtr bucket_variant =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
variant_util::parse_json_to_variant(*bucket_variant, *bucket_strings,
config);
// 6. append and write
@@ -1703,7 +1704,7 @@ TEST_F(VariantColumnWriterReaderTest,
test_doc_compact_sparse_write_array_gap) {
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
MutableColumnPtr bucket_variant =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
variant_util::parse_json_to_variant(*bucket_variant, *strings, parse_cfg);
auto bucket_data = std::make_unique<VariantColumnData>();
@@ -1804,7 +1805,7 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_doc_sparse_write_array_gap_and_
parse_cfg.parse_to = ParseConfig::ParseTo::OnlyDocValueColumn;
MutableColumnPtr variant_column =
-
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), false);
+
ColumnVariant::create(parent_column.variant_max_subcolumns_count(), true);
variant_util::parse_json_to_variant(*variant_column, *strings, parse_cfg);
auto variant_data = std::make_unique<VariantColumnData>();
@@ -2012,7 +2013,7 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_data_advanced) {
st = it->init(column_iter_opts);
EXPECT_TRUE(st.ok()) << st.msg();
- MutableColumnPtr new_column_object = ColumnVariant::create(3);
+ MutableColumnPtr new_column_object = ColumnVariant::create(3, false);
size_t nrows = 1000;
st = it->seek_to_ordinal(0);
EXPECT_TRUE(st.ok()) << st.msg();
@@ -2031,7 +2032,7 @@ TEST_F(VariantColumnWriterReaderTest,
test_write_data_advanced) {
}
auto read_to_column_object = [&](ColumnIteratorUPtr& it) {
- new_column_object = ColumnVariant::create(10);
+ new_column_object = ColumnVariant::create(10, false);
nrows = 1000;
st = it->seek_to_ordinal(0);
EXPECT_TRUE(st.ok()) << st.msg();
@@ -3301,7 +3302,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
st = nested_iter->init(column_iter_opts);
EXPECT_TRUE(st.ok()) << st.msg();
// fill with nullable ColumnVariant target
- MutableColumnPtr new_column_object1 = ColumnVariant::create(3);
+ MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false);
MutableColumnPtr null_object =
ColumnNullable::create(new_column_object1->assume_mutable(),
ColumnUInt8::create());
size_t n = 1000;
@@ -3313,7 +3314,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
EXPECT_TRUE(stats.bytes_read > 0);
{
// fill with nullable ColumnVariant target
- MutableColumnPtr new_column_object12 = ColumnVariant::create(3);
+ MutableColumnPtr new_column_object12 = ColumnVariant::create(3, false);
MutableColumnPtr null_object12 = ColumnNullable::create(
new_column_object12->assume_mutable(), ColumnUInt8::create());
st = nested_iter->seek_to_ordinal(0);
@@ -3345,7 +3346,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
st = nested_iter2->init(column_iter_opts);
EXPECT_TRUE(st.ok()) << st.msg();
// fill with nullable ColumnVariant target
- MutableColumnPtr new_column_object2 = ColumnVariant::create(3);
+ MutableColumnPtr new_column_object2 = ColumnVariant::create(3, false);
MutableColumnPtr null_object2 =
ColumnNullable::create(new_column_object2->assume_mutable(),
ColumnUInt8::create());
size_t nrows = 1000;
@@ -3393,7 +3394,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) {
nested_subcolumns[parent_path].emplace_back(second);
// test _process_with_nested_column with different type
// init container which is ColumnVariant
- MutableColumnPtr nested_column_object = ColumnVariant::create(3);
+ MutableColumnPtr nested_column_object = ColumnVariant::create(3,
false);
auto& container_variant =
assert_cast<ColumnVariant&>(*nested_column_object);
st = nested_iter2->_process_nested_columns(container_variant,
nested_subcolumns, n);
std::cout << st.msg() << std::endl;
@@ -3455,7 +3456,7 @@ TEST_F(VariantColumnWriterReaderTest,
test_nested_iter_nullable) {
st = nested_iter->init(column_iter_opts);
EXPECT_TRUE(st.ok()) << st.msg();
// fill with nullable ColumnVariant target
- MutableColumnPtr new_column_object1 = ColumnVariant::create(3);
+ MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false);
MutableColumnPtr null_object =
ColumnNullable::create(new_column_object1->assume_mutable(),
ColumnUInt8::create());
size_t nrows = 1000;
diff --git a/be/test/storage/segment/variant_util_test.cpp
b/be/test/storage/segment/variant_util_test.cpp
index 53e9ede840c..9fb72343eba 100644
--- a/be/test/storage/segment/variant_util_test.cpp
+++ b/be/test/storage/segment/variant_util_test.cpp
@@ -49,7 +49,7 @@ TEST(VariantUtilTest,
ParseDocValueToSubcolumns_FillsDefaultsAndValues) {
R"({"a":3})", //
};
- auto variant = ColumnVariant::create(0);
+ auto variant = ColumnVariant::create(0, true);
auto json_col = _make_json_column(jsons);
ParseConfig cfg;
@@ -101,7 +101,7 @@ TEST(VariantUtilTest,
ParseOnlyDocValueColumn_SerializesMixedTypes) {
R"({"b":false,"arr":[4],"s":"y"})",
};
- auto variant = ColumnVariant::create(0);
+ auto variant = ColumnVariant::create(0, true);
auto json_col = _make_json_column(jsons);
ParseConfig cfg;
@@ -185,14 +185,14 @@ TEST(VariantUtilTest,
ParseVariantColumns_ScalarJsonStringToSubcolumns) {
TabletSchema tablet_schema;
tablet_schema.init_from_pb(schema_pb);
- auto variant = ColumnVariant::create(0);
+ auto variant = ColumnVariant::create(0, false);
doris::VariantUtil::insert_root_scalar_field(
*variant, Field::create_field<TYPE_STRING>(String(R"({"a":1})")));
doris::VariantUtil::insert_root_scalar_field(
*variant, Field::create_field<TYPE_STRING>(String(R"({"a":2})")));
Block block;
- block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0),
"v"});
+ block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0,
false), "v"});
const std::vector<uint32_t> column_pos {0};
Status st = parse_and_materialize_variant_columns(block, tablet_schema,
column_pos);
@@ -219,7 +219,7 @@ TEST(VariantUtilTest,
ParseVariantColumns_DocModeBinaryToSubcolumns) {
};
// Build a doc-mode ColumnVariant: Only root in subcolumns, others stored
in doc snapshot column.
- auto variant = ColumnVariant::create(0);
+ auto variant = ColumnVariant::create(0, true);
auto json_col = _make_json_column(jsons);
ParseConfig cfg;
cfg.deprecated_enable_flatten_nested = false;
@@ -228,7 +228,7 @@ TEST(VariantUtilTest,
ParseVariantColumns_DocModeBinaryToSubcolumns) {
ASSERT_TRUE(variant->is_doc_mode());
Block block;
- block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0),
"v"});
+ block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0,
true), "v"});
ParseConfig parse_cfg;
parse_cfg.deprecated_enable_flatten_nested = false;
@@ -271,7 +271,7 @@ TEST(VariantUtilTest,
ParseVariantColumns_DocModeBinaryToSubcolumns) {
TEST(VariantUtilTest, ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) {
const std::vector<std::string_view> jsons = {R"({"a":1})"};
- auto variant = ColumnVariant::create(0);
+ auto variant = ColumnVariant::create(0, true);
auto json_col = _make_json_column(jsons);
ParseConfig cfg;
@@ -281,7 +281,7 @@ TEST(VariantUtilTest,
ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) {
ASSERT_TRUE(variant->is_doc_mode());
Block block;
- block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0),
"v"});
+ block.insert({variant->get_ptr(), std::make_shared<DataTypeVariant>(0,
true), "v"});
ParseConfig parse_cfg;
parse_cfg.deprecated_enable_flatten_nested = false;
diff --git a/be/test/testutil/variant_util.h b/be/test/testutil/variant_util.h
index 9d3cdea9e84..17959afcbd6 100644
--- a/be/test/testutil/variant_util.h
+++ b/be/test/testutil/variant_util.h
@@ -70,7 +70,7 @@ public:
static auto construct_basic_varint_column() {
// 1. create an empty variant column
- auto variant = ColumnVariant::create(5);
+ auto variant = ColumnVariant::create(5, false);
std::vector<std::pair<std::string, doris::Field>> data;
@@ -105,12 +105,12 @@ public:
dynamic_subcolumns.add(PathInData("v.b"), ColumnVariant::Subcolumn {0,
true});
dynamic_subcolumns.add(PathInData("v.b.d"), ColumnVariant::Subcolumn
{0, true});
dynamic_subcolumns.add(PathInData("v.c.d"), ColumnVariant::Subcolumn
{0, true});
- return ColumnVariant::create(5, std::move(dynamic_subcolumns));
+ return ColumnVariant::create(5, false, std::move(dynamic_subcolumns));
}
static auto construct_advanced_varint_column() {
// 1. create an empty variant column
- auto variant = ColumnVariant::create(5);
+ auto variant = ColumnVariant::create(5, false);
std::vector<std::pair<std::string, doris::Field>> data;
@@ -153,7 +153,7 @@ public:
static auto construct_varint_column_only_subcolumns() {
// 1. create an empty variant column
- auto variant = ColumnVariant::create(5);
+ auto variant = ColumnVariant::create(5, false);
std::vector<std::pair<std::string, doris::Field>> data;
@@ -196,7 +196,7 @@ public:
static auto construct_varint_column_more_subcolumns() {
// 1. create an empty variant column
- auto variant = ColumnVariant::create(5);
+ auto variant = ColumnVariant::create(5, false);
std::vector<std::pair<std::string, doris::Field>> data;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index 94a46864eb9..d5d6e1559de 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -602,6 +602,7 @@ public class Column implements GsonPostProcessable {
tColumnType.setPrecision(this.getPrecision());
tColumnType.setScale(this.getScale());
tColumnType.setVariantMaxSubcolumnsCount(this.getVariantMaxSubcolumnsCount());
+ tColumnType.setVariantEnableDocMode(this.getVariantEnableDocMode());
tColumnType.setIndexLen(this.getOlapColumnIndexSize());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index a3b75f40903..3810e30b195 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -396,13 +396,15 @@ public abstract class ExternalFileTableValuedFunction
extends TableValuedFunctio
} else if (tPrimitiveType == TPrimitiveType.VARIANT) {
// Preserve VARIANT-specific properties from PTypeNode, especially
variant_max_subcolumns_count.
int maxSubcolumns = typeNode.getVariantMaxSubcolumnsCount();
+ boolean enableDocMode = typeNode.hasVariantEnableDocMode()
+ ? typeNode.getVariantEnableDocMode() : false;
// Currently no predefined fields are carried in PTypeNode for
VARIANT, so use empty list and default
// values for other properties.
type = new VariantType(new ArrayList<>(), maxSubcolumns,
/*enableTypedPathsToSparse*/ false,
/*variantMaxSparseColumnStatisticsSize*/ 10000,
/*variantSparseHashShardCount*/ 0,
- /*variantEnableDocMode*/ false,
+ /*variantEnableDocMode*/ enableDocMode,
/*variantDocMaterializationMinRows*/ 0,
/*variantDocShardCount*/ 0,
/*enableNestedGroup*/ false);
diff --git a/fe/fe-type/src/main/java/org/apache/doris/catalog/VariantType.java
b/fe/fe-type/src/main/java/org/apache/doris/catalog/VariantType.java
index 69917dbf4b7..df24768c180 100644
--- a/fe/fe-type/src/main/java/org/apache/doris/catalog/VariantType.java
+++ b/fe/fe-type/src/main/java/org/apache/doris/catalog/VariantType.java
@@ -200,6 +200,8 @@ public class VariantType extends ScalarType {
// set the count
container.getTypes().get(container.getTypes().size() - 1)
.scalar_type.setVariantMaxSubcolumnsCount(variantMaxSubcolumnsCount);
+ container.getTypes().get(container.getTypes().size() - 1)
+ .scalar_type.setVariantEnableDocMode(enableVariantDocMode);
}
@Override
diff --git a/gensrc/proto/data.proto b/gensrc/proto/data.proto
index bda5daa03bf..9cf8e37d6d4 100644
--- a/gensrc/proto/data.proto
+++ b/gensrc/proto/data.proto
@@ -68,6 +68,7 @@ message PColumnMeta {
optional int32 be_exec_version = 8;
optional segment_v2.ColumnPathInfo column_path = 9;
optional int32 variant_max_subcolumns_count = 10 [default = 0];
+ optional bool variant_enable_doc_mode = 11 [default = false];
}
message PBlock {
diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto
index 74c5479fd0f..3c8e646acd9 100644
--- a/gensrc/proto/segment_v2.proto
+++ b/gensrc/proto/segment_v2.proto
@@ -230,6 +230,7 @@ message ColumnMetaPB {
optional uint64 compressed_data_bytes = 24;
optional uint64 uncompressed_data_bytes = 25;
optional uint64 raw_data_bytes = 26;
+ optional bool variant_enable_doc_mode = 27 [default = false];
}
// External column meta entry describing one top-level column's externalized
diff --git a/gensrc/proto/types.proto b/gensrc/proto/types.proto
index 49bff7c4049..d0947b7a2af 100644
--- a/gensrc/proto/types.proto
+++ b/gensrc/proto/types.proto
@@ -56,6 +56,7 @@ message PTypeNode {
// only used for VARIANT
optional int32 variant_max_subcolumns_count = 6 [default = 0];
+ optional bool variant_enable_doc_mode = 7 [default = false];
};
// A flattened representation of a tree of column types obtained by depth-first
diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift
index 08063e06610..7a2f6a185c4 100644
--- a/gensrc/thrift/Descriptors.thrift
+++ b/gensrc/thrift/Descriptors.thrift
@@ -96,7 +96,7 @@ struct TColumn {
23: optional bool is_on_update_current_timestamp = false
24: optional i32 variant_max_sparse_column_statistics_size = 10000
25: optional i32 variant_sparse_hash_shard_count
- 26: optional bool variant_enable_doc_mode
+ 26: optional bool variant_enable_doc_mode // deprecated, use
TColumnType.variant_enable_doc_mode
27: optional i64 variant_doc_materialization_min_rows
28: optional i32 variant_doc_hash_shard_count
29: optional bool variant_enable_nested_group
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index c8429a0fe2d..66ad6fd3f54 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -145,6 +145,7 @@ struct TScalarType {
// Only set for VARIANT
5: optional i32 variant_max_subcolumns_count = 0;
+ 6: optional bool variant_enable_doc_mode = false;
}
// Represents a field in a STRUCT type.
@@ -284,6 +285,7 @@ struct TColumnType {
4: optional i32 precision
5: optional i32 scale
6: optional i32 variant_max_subcolumns_count = 0;
+ 7: optional bool variant_enable_doc_mode = false;
}
// A TNetworkAddress is the standard host, port representation of a
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]