github-actions[bot] commented on code in PR #26220:
URL: https://github.com/apache/doris/pull/26220#discussion_r1388284536


##########
be/src/vec/columns/column_nothing.h:
##########
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+class ColumnNothing final : public COWHelper<IColumnDummy, ColumnNothing> {
+private:
+    friend class COWHelper<IColumnDummy, ColumnNothing>;
+
+    ColumnNothing(size_t s_) { s = s_; }
+
+    ColumnNothing(const ColumnNothing&) = default;
+
+public:
+    const char* get_family_name() const override { return "Nothing"; }
+    MutableColumnPtr clone_dummy(size_t s_) const override { return 
ColumnNothing::create(s_); }
+
+    bool structure_equals(const IColumn& rhs) const override {

Review Comment:
   warning: function 'structure_equals' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool structure_equals(const IColumn& rhs) const override {
   ```
   



##########
be/src/vec/columns/column_nullable.h:
##########
@@ -102,6 +102,7 @@ class ColumnNullable final : public COWHelper<IColumn, 
ColumnNullable> {
     bool is_null_at(size_t n) const override {
         return assert_cast<const ColumnUInt8&>(*null_map).get_data()[n] != 0;
     }
+    bool is_default_at(size_t n) const override { return is_null_at(n); }

Review Comment:
   warning: function 'is_default_at' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool is_default_at(size_t n) const override { return 
is_null_at(n); }
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      const SubcolumnColumnReaders::Node* node,
+                                      const SubcolumnColumnReaders::Node* root,
+                                      bool output_as_raw_json) {
+    // None leave node need merge with root
+    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    std::vector<const SubcolumnColumnReaders::Node*> leaves;
+    vectorized::PathsInData leaves_paths;
+    SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
+    for (size_t i = 0; i < leaves_paths.size(); ++i) {
+        if (leaves_paths[i] == root->path) {
+            // use set_root to share instead
+            continue;
+        }
+        stream_iter->add_stream(leaves[i]);
+    }
+    // Make sure the root node is in strem_cache, so that child can merge data 
with root
+    // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root 
path so that
+    // we could make sure the data could be fully merged, since some column 
may not be extracted but remains in root
+    // like {"a" : "b" : {"e" : 1.1}} in jsonb format
+    ColumnIterator* it;
+    RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
+    stream_iter->set_root(std::make_unique<StreamReader>(
+            root->data.file_column_type->create_column(), 
std::unique_ptr<ColumnIterator>(it),
+            root->data.file_column_type));
+    reader->reset(stream_iter);
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->init(opts));
+        node.data.inited = true;
+        return Status::OK();
+    }));
+    if (_root_reader && !_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord));
+        return Status::OK();
+    }));
+    if (_root_reader) {
+        DCHECK(_root_reader->inited);
+        RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                          bool* has_null) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->next_batch(n, reader.column, 
has_null));
+                VLOG_DEBUG << fmt::format("{} next_batch {} rows, type={}", 
path.get_path(), *n,
+                                          type->get_name());
+                reader.rows_read += *n;
+                return Status::OK();
+            },
+            dst, *n);
+}
+
+Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,
+                                              vectorized::MutableColumnPtr& 
dst) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->read_by_rowids(rowids, count, 
reader.column));
+                VLOG_DEBUG << fmt::format("{} read_by_rowids {} rows, 
type={}", path.get_path(),
+                                          count, type->get_name());
+                reader.rows_read += count;
+                return Status::OK();
+            },
+            dst, count);
+}
+
+Status HierarchicalDataReader::add_stream(const SubcolumnColumnReaders::Node* 
node) {
+    if (_substream_reader.find_leaf(node->path)) {
+        VLOG_DEBUG << "Already exist sub column " << node->path.get_path();
+        return Status::OK();
+    }
+    CHECK(node);
+    ColumnIterator* it;
+    RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
+    std::unique_ptr<ColumnIterator> it_ptr;
+    it_ptr.reset(it);
+    StreamReader reader(node->data.file_column_type->create_column(), 
std::move(it_ptr),
+                        node->data.file_column_type);
+    bool added = _substream_reader.add(node->path, std::move(reader));
+    if (!added) {
+        return Status::InternalError("Failed to add node path {}", 
node->path.get_path());
+    }
+    VLOG_DEBUG << fmt::format("Add substream {} for {}", 
node->path.get_path(), _path.get_path());
+    return Status::OK();
+}
+
+ordinal_t HierarchicalDataReader::get_current_ordinal() const {
+    return (*_substream_reader.begin())->data.iterator->get_current_ordinal();
+}
+
+Status ExtractReader::init(const ColumnIteratorOptions& opts) {
+    if (!_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status ExtractReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status ExtractReader::seek_to_ordinal(ordinal_t ord) {
+    CHECK(_root_reader->inited);
+    return _root_reader->iterator->seek_to_ordinal(ord);
+}
+
+Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t 
nrows) {
+    DCHECK(_root_reader);
+    DCHECK(_root_reader->inited);
+    vectorized::ColumnNullable* nullable_column = nullptr;
+    if (dst->is_nullable()) {
+        nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+    }
+    auto& variant =
+            nullable_column == nullptr
+                    ? assert_cast<vectorized::ColumnObject&>(*dst)
+                    : 
assert_cast<vectorized::ColumnObject&>(nullable_column->get_nested_column());
+    const auto& root =
+            _root_reader->column->is_nullable()
+                    ? assert_cast<vectorized::ColumnObject&>(
+                              
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+                                      .get_nested_column())
+                    : assert_cast<const 
vectorized::ColumnObject&>(*_root_reader->column);
+    // extract root value with path, we can't modify the original root column
+    // since some other column may depend on it.
+    vectorized::MutableColumnPtr extracted_column;

Review Comment:
   warning: variable 'extracted_column' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       vectorized::MutableColumnPtr extracted_column = 0;
   ```
   



##########
be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp:
##########
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+
+#include "common/status.h"
+#include "io/io_common.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_object.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/json/path_in_data.h"
+
+namespace doris {
+namespace segment_v2 {
+
+Status HierarchicalDataReader::create(std::unique_ptr<ColumnIterator>* reader,
+                                      const SubcolumnColumnReaders::Node* node,
+                                      const SubcolumnColumnReaders::Node* root,
+                                      bool output_as_raw_json) {
+    // None leave node need merge with root
+    auto* stream_iter = new HierarchicalDataReader(node->path, 
output_as_raw_json);
+    std::vector<const SubcolumnColumnReaders::Node*> leaves;
+    vectorized::PathsInData leaves_paths;
+    SubcolumnColumnReaders::get_leaves_of_node(node, leaves, leaves_paths);
+    for (size_t i = 0; i < leaves_paths.size(); ++i) {
+        if (leaves_paths[i] == root->path) {
+            // use set_root to share instead
+            continue;
+        }
+        stream_iter->add_stream(leaves[i]);
+    }
+    // Make sure the root node is in strem_cache, so that child can merge data 
with root
+    // Eg. {"a" : "b" : {"c" : 1}}, access the `a.b` path and merge with root 
path so that
+    // we could make sure the data could be fully merged, since some column 
may not be extracted but remains in root
+    // like {"a" : "b" : {"e" : 1.1}} in jsonb format
+    ColumnIterator* it;
+    RETURN_IF_ERROR(root->data.reader->new_iterator(&it));
+    stream_iter->set_root(std::make_unique<StreamReader>(
+            root->data.file_column_type->create_column(), 
std::unique_ptr<ColumnIterator>(it),
+            root->data.file_column_type));
+    reader->reset(stream_iter);
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->init(opts));
+        node.data.inited = true;
+        return Status::OK();
+    }));
+    if (_root_reader && !_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status HierarchicalDataReader::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(tranverse([&](SubstreamReaderTree::Node& node) {
+        RETURN_IF_ERROR(node.data.iterator->seek_to_ordinal(ord));
+        return Status::OK();
+    }));
+    if (_root_reader) {
+        DCHECK(_root_reader->inited);
+        RETURN_IF_ERROR(_root_reader->iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status HierarchicalDataReader::next_batch(size_t* n, 
vectorized::MutableColumnPtr& dst,
+                                          bool* has_null) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->next_batch(n, reader.column, 
has_null));
+                VLOG_DEBUG << fmt::format("{} next_batch {} rows, type={}", 
path.get_path(), *n,
+                                          type->get_name());
+                reader.rows_read += *n;
+                return Status::OK();
+            },
+            dst, *n);
+}
+
+Status HierarchicalDataReader::read_by_rowids(const rowid_t* rowids, const 
size_t count,
+                                              vectorized::MutableColumnPtr& 
dst) {
+    return process_read(
+            [&](StreamReader& reader, const vectorized::PathInData& path,
+                const vectorized::DataTypePtr& type) {
+                CHECK(reader.inited);
+                RETURN_IF_ERROR(reader.iterator->read_by_rowids(rowids, count, 
reader.column));
+                VLOG_DEBUG << fmt::format("{} read_by_rowids {} rows, 
type={}", path.get_path(),
+                                          count, type->get_name());
+                reader.rows_read += count;
+                return Status::OK();
+            },
+            dst, count);
+}
+
+Status HierarchicalDataReader::add_stream(const SubcolumnColumnReaders::Node* 
node) {
+    if (_substream_reader.find_leaf(node->path)) {
+        VLOG_DEBUG << "Already exist sub column " << node->path.get_path();
+        return Status::OK();
+    }
+    CHECK(node);
+    ColumnIterator* it;
+    RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
+    std::unique_ptr<ColumnIterator> it_ptr;
+    it_ptr.reset(it);
+    StreamReader reader(node->data.file_column_type->create_column(), 
std::move(it_ptr),
+                        node->data.file_column_type);
+    bool added = _substream_reader.add(node->path, std::move(reader));
+    if (!added) {
+        return Status::InternalError("Failed to add node path {}", 
node->path.get_path());
+    }
+    VLOG_DEBUG << fmt::format("Add substream {} for {}", 
node->path.get_path(), _path.get_path());
+    return Status::OK();
+}
+
+ordinal_t HierarchicalDataReader::get_current_ordinal() const {
+    return (*_substream_reader.begin())->data.iterator->get_current_ordinal();
+}
+
+Status ExtractReader::init(const ColumnIteratorOptions& opts) {
+    if (!_root_reader->inited) {
+        RETURN_IF_ERROR(_root_reader->iterator->init(opts));
+        _root_reader->inited = true;
+    }
+    return Status::OK();
+}
+
+Status ExtractReader::seek_to_first() {
+    LOG(FATAL) << "Not implemented";
+}
+
+Status ExtractReader::seek_to_ordinal(ordinal_t ord) {
+    CHECK(_root_reader->inited);
+    return _root_reader->iterator->seek_to_ordinal(ord);
+}
+
+Status ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t 
nrows) {
+    DCHECK(_root_reader);
+    DCHECK(_root_reader->inited);
+    vectorized::ColumnNullable* nullable_column = nullptr;
+    if (dst->is_nullable()) {
+        nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+    }
+    auto& variant =
+            nullable_column == nullptr
+                    ? assert_cast<vectorized::ColumnObject&>(*dst)
+                    : 
assert_cast<vectorized::ColumnObject&>(nullable_column->get_nested_column());
+    const auto& root =
+            _root_reader->column->is_nullable()
+                    ? assert_cast<vectorized::ColumnObject&>(
+                              
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+                                      .get_nested_column())
+                    : assert_cast<const 
vectorized::ColumnObject&>(*_root_reader->column);
+    // extract root value with path, we can't modify the original root column
+    // since some other column may depend on it.
+    vectorized::MutableColumnPtr extracted_column;
+    RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b
+            _col.path_info().pop_front(), extracted_column));
+    if (variant.empty() || variant.is_null_root()) {
+        variant.create_root(root.get_root_type(), std::move(extracted_column));
+    } else {
+        vectorized::ColumnPtr cast_column;

Review Comment:
   warning: variable 'cast_column' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
           vectorized::ColumnPtr cast_column = 0;
   ```
   



##########
be/src/vec/columns/column_nullable.h:
##########
@@ -391,6 +394,8 @@
 
     ColumnPtr index(const IColumn& indexes, size_t limit) const override;
 
+    bool is_predicate_column() const override { return 
nested_column->is_predicate_column(); }

Review Comment:
   warning: function 'is_predicate_column' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool is_predicate_column() const override { return 
nested_column->is_predicate_column(); }
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -58,127 +71,70 @@
 namespace doris::vectorized {
 namespace {
 
-DataTypePtr create_array_of_type(DataTypePtr type, size_t num_dimensions) {
+DataTypePtr create_array_of_type(DataTypePtr type, size_t num_dimensions, bool 
is_nullable) {
+    const DataTypeNullable* nullable = typeid_cast<const 
DataTypeNullable*>(type.get());
+    if ((nullable &&
+         typeid_cast<const 
ColumnObject::MostCommonType*>(nullable->get_nested_type().get())) ||
+        typeid_cast<const ColumnObject::MostCommonType*>(type.get())) {
+        // JSONB type MUST NOT wrapped in ARRAY column, it should be top level.
+        // So we ignored num_dimensions.
+        return type;
+    }
     for (size_t i = 0; i < num_dimensions; ++i) {
         type = std::make_shared<DataTypeArray>(std::move(type));
+        if (is_nullable) {
+            // wrap array with nullable
+            type = make_nullable(type);
+        }
     }
     return type;
 }
 
-DataTypePtr getBaseTypeOfArray(const DataTypePtr& type) {
+DataTypePtr get_base_type_of_array(const DataTypePtr& type) {
     /// Get raw pointers to avoid extra copying of type pointers.
     const DataTypeArray* last_array = nullptr;
     const auto* current_type = type.get();
+    if (const auto* nullable = typeid_cast<const 
DataTypeNullable*>(current_type)) {
+        current_type = nullable->get_nested_type().get();
+    }
     while (const auto* type_array = typeid_cast<const 
DataTypeArray*>(current_type)) {
         current_type = type_array->get_nested_type().get();
         last_array = type_array;
+        if (const auto* nullable = typeid_cast<const 
DataTypeNullable*>(current_type)) {
+            current_type = nullable->get_nested_type().get();
+        }
     }
     return last_array ? last_array->get_nested_type() : type;
 }
 
-size_t getNumberOfDimensions(const IDataType& type) {
-    if (const auto* type_array = typeid_cast<const DataTypeArray*>(&type)) {
-        return type_array->get_number_of_dimensions();
-    }
-    return 0;
-}
-
-DataTypePtr get_data_type_by_column(const IColumn& column) {
-    auto idx = column.get_data_type();
-    if (WhichDataType(idx).is_simple()) {
-        return DataTypeFactory::instance().create_data_type(idx);
-    }
-    if (WhichDataType(idx).is_nothing()) {
-        return std::make_shared<DataTypeNothing>();
-    }
-    if (const auto* column_array = check_and_get_column<ColumnArray>(&column)) 
{
-        return 
std::make_shared<DataTypeArray>(get_data_type_by_column(column_array->get_data()));
-    }
-    if (const auto* column_nullable = 
check_and_get_column<ColumnNullable>(&column)) {
-        return 
make_nullable(get_data_type_by_column(column_nullable->get_nested_column()));
-    }
-    // TODO add more types
-    assert(false);
-    return nullptr;
-}
-
-/// Recreates column with default scalar values and keeps sizes of arrays.
-ColumnPtr recreate_column_with_default_value(const ColumnPtr& column,
-                                             const DataTypePtr& scalar_type,
-                                             size_t num_dimensions) {
-    const auto* column_array = check_and_get_column<ColumnArray>(column.get());
-    if (column_array && num_dimensions) {
-        return ColumnArray::create(
-                
recreate_column_with_default_value(column_array->get_data_ptr(), scalar_type,
-                                                   num_dimensions - 1),
-                IColumn::mutate(column_array->get_offsets_ptr()));
-    }
-    return create_array_of_type(scalar_type, num_dimensions)
-            ->create_column()
-            ->clone_resized(column->size());
-}
-
-Array create_empty_array_field(size_t num_dimensions) {
-    assert(num_dimensions != 0);
-    Array array;
-    Array* current_array = &array;
-    for (size_t i = 1; i < num_dimensions; ++i) {
-        current_array->push_back(Array());
-        current_array = &current_array->back().get<Array&>();
-    }
-    return array;
-}
-
-/// Replaces NULL fields to given field or empty array.
-class FieldVisitorReplaceNull : public StaticVisitor<Field> {
-public:
-    explicit FieldVisitorReplaceNull(const Field& replacement_, size_t 
num_dimensions_)
-            : replacement(replacement_), num_dimensions(num_dimensions_) {}
-    Field operator()(const Null&) const {
-        return num_dimensions ? create_empty_array_field(num_dimensions) : 
replacement;
+size_t get_number_of_dimensions(const IDataType& type) {
+    int num_dimensions = 0;
+    const auto* current_type = &type;
+    if (const auto* nullable = typeid_cast<const 
DataTypeNullable*>(current_type)) {
+        current_type = nullable->get_nested_type().get();
     }
-    Field operator()(const Array& x) const {
-        assert(num_dimensions > 0);
-        const size_t size = x.size();
-        Array res(size);
-        for (size_t i = 0; i < size; ++i) {
-            res[i] = apply_visitor(FieldVisitorReplaceNull(replacement, 
num_dimensions - 1), x[i]);
+    while (const auto* type_array = typeid_cast<const 
DataTypeArray*>(current_type)) {
+        current_type = type_array->get_nested_type().get();
+        num_dimensions += 1;
+        if (const auto* nullable = typeid_cast<const 
DataTypeNullable*>(current_type)) {
+            current_type = nullable->get_nested_type().get();
         }
-        return res;
-    }
-    template <typename T>
-    Field operator()(const T& x) const {
-        return x;
     }
-
-private:
-    const Field& replacement;
-    size_t num_dimensions;
-};
+    return num_dimensions;
+}
 
 /// Calculates number of dimensions in array field.
 /// Returns 0 for scalar fields.
 class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t> {
 public:
     size_t operator()(const Array& x) const {
         const size_t size = x.size();
-        std::optional<size_t> dimensions;
+        size_t dimensions = 0;
         for (size_t i = 0; i < size; ++i) {
-            /// Do not count Nulls, because they will be replaced by default
-            /// values with proper number of dimensions.
-            if (x[i].is_null()) {
-                continue;
-            }
-            size_t current_dimensions = apply_visitor(*this, x[i]);
-            if (!dimensions) {
-                dimensions = current_dimensions;
-            } else if (current_dimensions != *dimensions) {
-                throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
-                                       "Number of dimensions mismatched among 
array elements");
-                return 0;
-            }
+            size_t element_dimensions = apply_visitor(*this, x[i]);

Review Comment:
   warning: variable 'element_dimensions' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
               size_t element_dimensions = 0 = apply_visitor(*this, x[i]);
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -371,53 +335,120 @@
 }
 
 void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn& src, size_t 
start, size_t length) {
-    assert(src.is_finalized());
-    const auto& src_column = src.data.back();
-    const auto& src_type = src.least_common_type.get();
+    assert(start + length <= src.size());
+    size_t end = start + length;
+    // num_rows += length;
     if (data.empty()) {
-        add_new_column_part(src.least_common_type.get());
-        data.back()->insert_range_from(*src_column, start, length);
-    } else if (least_common_type.get()->equals(*src_type)) {
-        data.back()->insert_range_from(*src_column, start, length);
-    } else {
-        DataTypePtr new_least_common_type = nullptr;
-        get_least_supertype(DataTypes {least_common_type.get(), src_type}, 
&new_least_common_type,
-                            true /*compatible with string type*/);
-        ColumnPtr casted_column;
-        Status st = schema_util::cast_column({src_column, src_type, ""}, 
new_least_common_type,
-                                             &casted_column);
+        add_new_column_part(src.get_least_common_type());
+    } else if (!least_common_type.get()->equals(*src.get_least_common_type())) 
{
+        DataTypePtr new_least_common_type;
+        get_least_supertype<LeastSupertypeOnError::Jsonb>(
+                DataTypes {least_common_type.get(), 
src.get_least_common_type()},
+                &new_least_common_type);
+        if (!new_least_common_type->equals(*least_common_type.get())) {
+            add_new_column_part(std::move(new_least_common_type));
+        }
+    }
+    if (end <= src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(length);
+        return;
+    }
+    if (start < src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(src.num_of_defaults_in_prefix - 
start);
+    }
+    auto insert_from_part = [&](const auto& column, const auto& column_type, 
size_t from,
+                                size_t n) {
+        assert(from + n <= column->size());
+        if (column_type->equals(*least_common_type.get())) {
+            data.back()->insert_range_from(*column, from, n);
+            return;
+        }
+        /// If we need to insert large range, there is no sense to cut part of 
column and cast it.
+        /// Casting of all column and inserting from it can be faster.
+        /// Threshold is just a guess.
+        if (n * 3 >= column->size()) {
+            ColumnPtr casted_column;
+            Status st = schema_util::cast_column({column, column_type, ""}, 
least_common_type.get(),
+                                                 &casted_column);
+            if (!st.ok()) {
+                throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                                       st.to_string() + ", real_code:{}", 
st.code());
+            }
+            data.back()->insert_range_from(*casted_column, from, n);
+            return;
+        }
+        auto casted_column = column->cut(from, n);
+        Status st = schema_util::cast_column({casted_column, column_type, ""},
+                                             least_common_type.get(), 
&casted_column);
         if (!st.ok()) {
             throw doris::Exception(ErrorCode::INVALID_ARGUMENT, st.to_string() 
+ ", real_code:{}",
                                    st.code());
         }
-        if (!least_common_type.get()->equals(*new_least_common_type)) {
-            add_new_column_part(std::move(new_least_common_type));
-        }
-        data.back()->insert_range_from(*casted_column, start, length);
+        data.back()->insert_range_from(*casted_column, 0, n);
+    };
+    size_t pos = 0;
+    size_t processed_rows = src.num_of_defaults_in_prefix;
+    /// Find the first part of the column that intersects the range.
+    while (pos < src.data.size() && processed_rows + src.data[pos]->size() < 
start) {
+        processed_rows += src.data[pos]->size();
+        ++pos;
+    }
+    /// Insert from the first part of column.
+    if (pos < src.data.size() && processed_rows < start) {
+        size_t part_start = start - processed_rows;
+        size_t part_length = std::min(src.data[pos]->size() - part_start, end 
- start);
+        insert_from_part(src.data[pos], src.data_types[pos], part_start, 
part_length);
+        processed_rows += src.data[pos]->size();
+        ++pos;
+    }
+    /// Insert from the parts of column in the middle of range.
+    while (pos < src.data.size() && processed_rows + src.data[pos]->size() < 
end) {
+        insert_from_part(src.data[pos], src.data_types[pos], 0, 
src.data[pos]->size());
+        processed_rows += src.data[pos]->size();
+        ++pos;
+    }
+    /// Insert from the last part of column if needed.
+    if (pos < src.data.size() && processed_rows < end) {
+        size_t part_end = end - processed_rows;
+        insert_from_part(src.data[pos], src.data_types[pos], 0, part_end);
     }
 }
 
 bool ColumnObject::Subcolumn::is_finalized() const {
-    return data.empty() || (data.size() == 1 && num_of_defaults_in_prefix == 
0);
+    return num_of_defaults_in_prefix == 0 && (data.empty() || (data.size() == 
1));
 }
 
 template <typename Func>
-ColumnPtr ColumnObject::apply_for_subcolumns(Func&& func, std::string_view 
func_name) const {
+MutableColumnPtr ColumnObject::apply_for_subcolumns(Func&& func) const {
     if (!is_finalized()) {
-        // LOG(FATAL) << "Cannot " << func_name << " non-finalized 
ColumnObject";
-        throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
-                               "Cannot {} non-finalized ColumnObject", 
func_name);
+        auto finalized = clone_finalized();
+        auto& finalized_object = assert_cast<ColumnObject&>(*finalized);
+        return finalized_object.apply_for_subcolumns(std::forward<Func>(func));
     }
-    auto res = ColumnObject::create(is_nullable);
+    auto res = ColumnObject::create(is_nullable, false);
     for (const auto& subcolumn : subcolumns) {
         auto new_subcolumn = func(subcolumn->data.get_finalized_column());
-        res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable());
+        res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(),
+                            subcolumn->data.get_least_common_type());
     }
     return res;
 }
 ColumnPtr ColumnObject::index(const IColumn& indexes, size_t limit) const {
     return apply_for_subcolumns(
-            [&](const auto& subcolumn) { return subcolumn.index(indexes, 
limit); }, "index");
+            [&](const auto& subcolumn) { return subcolumn.index(indexes, 
limit); });
+}
+
+bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) {
+    if (num_rows < config::threshold_rows_to_estimate_sparse_column) {
+        return false;
+    }
+    std::vector<double> defaults_ratio;
+    for (size_t i = 0; i < data.size(); ++i) {
+        defaults_ratio.push_back(data[i]->get_ratio_of_default_rows());
+    }
+    double default_ratio = std::accumulate(defaults_ratio.begin(), 
defaults_ratio.end(), 0.0) /

Review Comment:
   warning: variable 'default_ratio' is not initialized 
[cppcoreguidelines-init-variables]
   
   be/src/vec/columns/column_object.cpp:25:
   ```diff
   - #include <parallel_hashmap/phmap.h>
   + #include <math.h>
   + #include <parallel_hashmap/phmap.h>
   ```
   
   ```suggestion
       double default_ratio = NAN = std::accumulate(defaults_ratio.begin(), 
defaults_ratio.end(), 0.0) /
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -371,53 +335,120 @@
 }
 
 void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn& src, size_t 
start, size_t length) {
-    assert(src.is_finalized());
-    const auto& src_column = src.data.back();
-    const auto& src_type = src.least_common_type.get();
+    assert(start + length <= src.size());
+    size_t end = start + length;
+    // num_rows += length;
     if (data.empty()) {
-        add_new_column_part(src.least_common_type.get());
-        data.back()->insert_range_from(*src_column, start, length);
-    } else if (least_common_type.get()->equals(*src_type)) {
-        data.back()->insert_range_from(*src_column, start, length);
-    } else {
-        DataTypePtr new_least_common_type = nullptr;
-        get_least_supertype(DataTypes {least_common_type.get(), src_type}, 
&new_least_common_type,
-                            true /*compatible with string type*/);
-        ColumnPtr casted_column;
-        Status st = schema_util::cast_column({src_column, src_type, ""}, 
new_least_common_type,
-                                             &casted_column);
+        add_new_column_part(src.get_least_common_type());
+    } else if (!least_common_type.get()->equals(*src.get_least_common_type())) 
{
+        DataTypePtr new_least_common_type;
+        get_least_supertype<LeastSupertypeOnError::Jsonb>(
+                DataTypes {least_common_type.get(), 
src.get_least_common_type()},
+                &new_least_common_type);
+        if (!new_least_common_type->equals(*least_common_type.get())) {
+            add_new_column_part(std::move(new_least_common_type));
+        }
+    }
+    if (end <= src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(length);
+        return;
+    }
+    if (start < src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(src.num_of_defaults_in_prefix - 
start);
+    }
+    auto insert_from_part = [&](const auto& column, const auto& column_type, 
size_t from,
+                                size_t n) {
+        assert(from + n <= column->size());
+        if (column_type->equals(*least_common_type.get())) {
+            data.back()->insert_range_from(*column, from, n);
+            return;
+        }
+        /// If we need to insert large range, there is no sense to cut part of 
column and cast it.
+        /// Casting of all column and inserting from it can be faster.
+        /// Threshold is just a guess.
+        if (n * 3 >= column->size()) {
+            ColumnPtr casted_column;
+            Status st = schema_util::cast_column({column, column_type, ""}, 
least_common_type.get(),
+                                                 &casted_column);
+            if (!st.ok()) {
+                throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                                       st.to_string() + ", real_code:{}", 
st.code());
+            }
+            data.back()->insert_range_from(*casted_column, from, n);
+            return;
+        }
+        auto casted_column = column->cut(from, n);
+        Status st = schema_util::cast_column({casted_column, column_type, ""},
+                                             least_common_type.get(), 
&casted_column);
         if (!st.ok()) {
             throw doris::Exception(ErrorCode::INVALID_ARGUMENT, st.to_string() 
+ ", real_code:{}",
                                    st.code());
         }
-        if (!least_common_type.get()->equals(*new_least_common_type)) {
-            add_new_column_part(std::move(new_least_common_type));
-        }
-        data.back()->insert_range_from(*casted_column, start, length);
+        data.back()->insert_range_from(*casted_column, 0, n);
+    };
+    size_t pos = 0;
+    size_t processed_rows = src.num_of_defaults_in_prefix;
+    /// Find the first part of the column that intersects the range.
+    while (pos < src.data.size() && processed_rows + src.data[pos]->size() < 
start) {
+        processed_rows += src.data[pos]->size();
+        ++pos;
+    }
+    /// Insert from the first part of column.
+    if (pos < src.data.size() && processed_rows < start) {
+        size_t part_start = start - processed_rows;
+        size_t part_length = std::min(src.data[pos]->size() - part_start, end 
- start);

Review Comment:
   warning: variable 'part_length' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
           size_t part_length = 0 = std::min(src.data[pos]->size() - 
part_start, end - start);
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;

Review Comment:
   warning: variable 'buffer' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
           rapidjson::StringBuffer buffer = 0;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);

Review Comment:
   warning: variable 'res' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
       bool res = false = serialize_one_row_to_json_format(row, &buf, nullptr);
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        output.write(buf.GetString(), buf.GetLength());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_json_format(int row, 
rapidjson::StringBuffer* output,
+                                                    bool* is_null) const {
+    CHECK(is_finalized());
+    if (subcolumns.empty()) {
+        if (is_null != nullptr) {
+            *is_null = true;
+        } else {
+            rapidjson::Value root(rapidjson::kNullType);
+            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+            return root.Accept(writer);
+        }
+        return true;
+    }
+    CHECK(size() > row);
+    rapidjson::StringBuffer buffer;

Review Comment:
   warning: variable 'buffer' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       rapidjson::StringBuffer buffer = 0;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;

Review Comment:
   warning: variable 'buf' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
       rapidjson::StringBuffer buf = 0;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        output.write(buf.GetString(), buf.GetLength());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_json_format(int row, 
rapidjson::StringBuffer* output,
+                                                    bool* is_null) const {
+    CHECK(is_finalized());
+    if (subcolumns.empty()) {
+        if (is_null != nullptr) {
+            *is_null = true;
+        } else {
+            rapidjson::Value root(rapidjson::kNullType);
+            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+            return root.Accept(writer);
+        }
+        return true;
+    }
+    CHECK(size() > row);
+    rapidjson::StringBuffer buffer;
+    rapidjson::Value root(rapidjson::kNullType);
+    if (doc_structure == nullptr) {
+        doc_structure = std::make_shared<rapidjson::Document>();
+        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
+        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+    }
+    if (!doc_structure->IsNull()) {
+        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+    }
+#ifndef NDEBUG
+    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
+#endif
+    for (const auto& subcolumn : subcolumns) {
+        find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), 
subcolumn->path,
+                                 
subcolumn->data.get_least_common_type_serde(), root,
+                                 doc_structure->GetAllocator(), row);
+    }
+    compact_null_values(root, doc_structure->GetAllocator());
+    if (root.IsNull() && is_null != nullptr) {
+        // Fast path
+        *is_null = true;
+    } else {
+        output->Clear();
+        rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+        return root.Accept(writer);
+    }
+    return true;
+}
+
+void ColumnObject::merge_sparse_to_root_column() {
+    CHECK(is_finalized());
+    if (sparse_columns.empty()) {
+        return;
+    }
+    ColumnPtr src = 
subcolumns.get_mutable_root()->data.get_finalized_column_ptr();
+    MutableColumnPtr mresult = src->clone_empty();

Review Comment:
   warning: variable 'mresult' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       MutableColumnPtr mresult = 0 = src->clone_empty();
   ```
   



##########
be/src/vec/columns/column_nullable.h:
##########
@@ -366,6 +365,10 @@
         return get_ptr();
     }
 
+    double get_ratio_of_default_rows(double sample_ratio) const override {

Review Comment:
   warning: function 'get_ratio_of_default_rows' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] double get_ratio_of_default_rows(double sample_ratio) 
const override {
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;

Review Comment:
   warning: variable 'current_key' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       std::string_view current_key = 0 = path.get_parts()[idx].key;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -493,7 +523,9 @@
             n -= column->size();
         }
     }
-    data.resize(data.size() - num_removed);
+    size_t sz = data.size() - num_removed;

Review Comment:
   warning: variable 'sz' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
       size_t sz = 0 = data.size() - num_removed;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -371,53 +335,120 @@
 }
 
 void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn& src, size_t 
start, size_t length) {
-    assert(src.is_finalized());
-    const auto& src_column = src.data.back();
-    const auto& src_type = src.least_common_type.get();
+    assert(start + length <= src.size());
+    size_t end = start + length;
+    // num_rows += length;
     if (data.empty()) {
-        add_new_column_part(src.least_common_type.get());
-        data.back()->insert_range_from(*src_column, start, length);
-    } else if (least_common_type.get()->equals(*src_type)) {
-        data.back()->insert_range_from(*src_column, start, length);
-    } else {
-        DataTypePtr new_least_common_type = nullptr;
-        get_least_supertype(DataTypes {least_common_type.get(), src_type}, 
&new_least_common_type,
-                            true /*compatible with string type*/);
-        ColumnPtr casted_column;
-        Status st = schema_util::cast_column({src_column, src_type, ""}, 
new_least_common_type,
-                                             &casted_column);
+        add_new_column_part(src.get_least_common_type());
+    } else if (!least_common_type.get()->equals(*src.get_least_common_type())) 
{
+        DataTypePtr new_least_common_type;
+        get_least_supertype<LeastSupertypeOnError::Jsonb>(
+                DataTypes {least_common_type.get(), 
src.get_least_common_type()},
+                &new_least_common_type);
+        if (!new_least_common_type->equals(*least_common_type.get())) {
+            add_new_column_part(std::move(new_least_common_type));
+        }
+    }
+    if (end <= src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(length);
+        return;
+    }
+    if (start < src.num_of_defaults_in_prefix) {
+        data.back()->insert_many_defaults(src.num_of_defaults_in_prefix - 
start);
+    }
+    auto insert_from_part = [&](const auto& column, const auto& column_type, 
size_t from,
+                                size_t n) {
+        assert(from + n <= column->size());
+        if (column_type->equals(*least_common_type.get())) {
+            data.back()->insert_range_from(*column, from, n);
+            return;
+        }
+        /// If we need to insert large range, there is no sense to cut part of 
column and cast it.
+        /// Casting of all column and inserting from it can be faster.
+        /// Threshold is just a guess.
+        if (n * 3 >= column->size()) {
+            ColumnPtr casted_column;

Review Comment:
   warning: variable 'casted_column' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
               ColumnPtr casted_column = 0;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -768,9 +820,27 @@
     return subcolumns.find_leaf(key) != nullptr;
 }
 
-bool ColumnObject::add_sub_column(const PathInData& key, MutableColumnPtr&& 
subcolumn) {
+bool ColumnObject::add_sub_column(const PathInData& key, MutableColumnPtr&& 
subcolumn,
+                                  DataTypePtr type) {
     size_t new_size = subcolumn->size();
-    bool inserted = subcolumns.add(key, Subcolumn(std::move(subcolumn), 
is_nullable));
+    doc_structure = nullptr;
+    if (key.empty() && subcolumns.empty()) {
+        // create root
+        subcolumns.create_root(Subcolumn(std::move(subcolumn), type, 
is_nullable, true));
+        num_rows = new_size;
+        return true;
+    }
+    if (key.empty() && ((!subcolumns.get_root()->is_scalar()) ||
+                        
is_nothing(subcolumns.get_root()->data.get_least_common_type()))) {
+        // update root
+        subcolumns.get_mutable_root()->modify_to_scalar(
+                Subcolumn(std::move(subcolumn), type, is_nullable, true));
+        if (num_rows == 0) {
+            num_rows = new_size;
+        }
+        return true;
+    }
+    bool inserted = subcolumns.add(key, Subcolumn(std::move(subcolumn), type, 
is_nullable));

Review Comment:
   warning: variable 'inserted' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       bool inserted = false = subcolumns.add(key, 
Subcolumn(std::move(subcolumn), type, is_nullable));
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);

Review Comment:
   warning: variable 'res' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
       bool res = false = serialize_one_row_to_json_format(row, &buf, nullptr);
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);

Review Comment:
   warning: variable 'target' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       rapidjson::Value* target = nullptr = find_leaf_node_by_path(root, path);
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();

Review Comment:
   warning: variable 'it' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
       rapidjson::Value::MemberIterator it = 0 = json.MemberBegin();
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        output.write(buf.GetString(), buf.GetLength());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_json_format(int row, 
rapidjson::StringBuffer* output,
+                                                    bool* is_null) const {
+    CHECK(is_finalized());
+    if (subcolumns.empty()) {
+        if (is_null != nullptr) {
+            *is_null = true;
+        } else {
+            rapidjson::Value root(rapidjson::kNullType);
+            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+            return root.Accept(writer);
+        }
+        return true;
+    }
+    CHECK(size() > row);
+    rapidjson::StringBuffer buffer;
+    rapidjson::Value root(rapidjson::kNullType);
+    if (doc_structure == nullptr) {
+        doc_structure = std::make_shared<rapidjson::Document>();
+        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
+        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+    }
+    if (!doc_structure->IsNull()) {
+        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+    }
+#ifndef NDEBUG
+    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
+#endif
+    for (const auto& subcolumn : subcolumns) {
+        find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), 
subcolumn->path,
+                                 
subcolumn->data.get_least_common_type_serde(), root,
+                                 doc_structure->GetAllocator(), row);
+    }
+    compact_null_values(root, doc_structure->GetAllocator());
+    if (root.IsNull() && is_null != nullptr) {
+        // Fast path
+        *is_null = true;
+    } else {
+        output->Clear();
+        rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+        return root.Accept(writer);
+    }
+    return true;
+}
+
+void ColumnObject::merge_sparse_to_root_column() {
+    CHECK(is_finalized());
+    if (sparse_columns.empty()) {
+        return;
+    }
+    ColumnPtr src = 
subcolumns.get_mutable_root()->data.get_finalized_column_ptr();

Review Comment:
   warning: variable 'src' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
       ColumnPtr src = 0 = 
subcolumns.get_mutable_root()->data.get_finalized_column_ptr();
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;

Review Comment:
   warning: variable 'buf' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
       rapidjson::StringBuffer buf = 0;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        output.write(buf.GetString(), buf.GetLength());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_json_format(int row, 
rapidjson::StringBuffer* output,
+                                                    bool* is_null) const {
+    CHECK(is_finalized());
+    if (subcolumns.empty()) {
+        if (is_null != nullptr) {
+            *is_null = true;
+        } else {
+            rapidjson::Value root(rapidjson::kNullType);
+            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+            return root.Accept(writer);
+        }
+        return true;
+    }
+    CHECK(size() > row);
+    rapidjson::StringBuffer buffer;
+    rapidjson::Value root(rapidjson::kNullType);
+    if (doc_structure == nullptr) {
+        doc_structure = std::make_shared<rapidjson::Document>();
+        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
+        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+    }
+    if (!doc_structure->IsNull()) {
+        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+    }
+#ifndef NDEBUG
+    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
+#endif
+    for (const auto& subcolumn : subcolumns) {
+        find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), 
subcolumn->path,
+                                 
subcolumn->data.get_least_common_type_serde(), root,
+                                 doc_structure->GetAllocator(), row);
+    }
+    compact_null_values(root, doc_structure->GetAllocator());
+    if (root.IsNull() && is_null != nullptr) {
+        // Fast path
+        *is_null = true;
+    } else {
+        output->Clear();
+        rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+        return root.Accept(writer);
+    }
+    return true;
+}
+
+void ColumnObject::merge_sparse_to_root_column() {
+    CHECK(is_finalized());
+    if (sparse_columns.empty()) {
+        return;
+    }
+    ColumnPtr src = 
subcolumns.get_mutable_root()->data.get_finalized_column_ptr();
+    MutableColumnPtr mresult = src->clone_empty();
+    const ColumnNullable* src_null = assert_cast<const 
ColumnNullable*>(src.get());
+    const ColumnString* src_column_ptr =
+            assert_cast<const ColumnString*>(&src_null->get_nested_column());
+    rapidjson::StringBuffer buffer;

Review Comment:
   warning: variable 'buffer' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       rapidjson::StringBuffer buffer = 0;
   ```
   



##########
be/src/vec/columns/column_object.cpp:
##########
@@ -857,28 +908,317 @@
                        [](const auto& entry) { return 
entry->data.is_finalized(); });
 }
 
-void ColumnObject::finalize() {
+void ColumnObject::Subcolumn::wrapp_array_nullable() {
+    // Wrap array with nullable, treat empty array as null to elimate conflict 
at present
+    auto& result_column = get_finalized_column_ptr();
+    if (result_column->is_column_array() && !result_column->is_nullable()) {
+        auto new_null_map = ColumnUInt8::create();
+        new_null_map->reserve(result_column->size());
+        auto& null_map_data = new_null_map->get_data();
+        auto array = static_cast<const ColumnArray*>(result_column.get());
+        for (size_t i = 0; i < array->size(); ++i) {
+            null_map_data.push_back(array->is_default_at(i));
+        }
+        result_column = ColumnNullable::create(std::move(result_column), 
std::move(new_null_map));
+        data_types[0] = make_nullable(data_types[0]);
+        least_common_type = LeastCommonType {data_types[0]};
+    }
+}
+
+rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const 
PathInData& path,
+                                         int idx = 0) {
+    if (idx >= path.get_parts().size()) {
+        return &json;
+    }
+
+    std::string_view current_key = path.get_parts()[idx].key;
+    if (!json.IsObject()) {
+        return nullptr;
+    }
+    rapidjson::Value name(current_key.data(), current_key.size());
+    auto it = json.FindMember(name);
+    if (it == json.MemberEnd()) {
+        return nullptr;
+    }
+    rapidjson::Value& current = it->value;
+    // if (idx == path.get_parts().size() - 1) {
+    //     return &current;
+    // }
+    return find_leaf_node_by_path(current, path, idx + 1);
+}
+
+void find_and_set_leave_value(const IColumn* column, const PathInData& path,
+                              const DataTypeSerDeSPtr& type, rapidjson::Value& 
root,
+                              rapidjson::Document::AllocatorType& allocator, 
int row) {
+    const auto* nullable = assert_cast<const ColumnNullable*>(column);
+    if (nullable->is_null_at(row)) {
+        return;
+    }
+    // TODO could cache the result of leaf nodes with it's path info
+    rapidjson::Value* target = find_leaf_node_by_path(root, path);
+    if (UNLIKELY(!target)) {
+        rapidjson::StringBuffer buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+        root.Accept(writer);
+        LOG(FATAL) << "could not find path " << path.get_path()
+                   << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
+    }
+    type->write_one_cell_to_json(*column, *target, allocator, row);
+}
+
+// compact null values
+// {"a" : {"b" : "d" {"n" : null}, "e" : null}, "c" : 10 }
+// after compact -> {"a" : {"c"} : 10}
+void compact_null_values(rapidjson::Value& json, 
rapidjson::Document::AllocatorType& allocator) {
+    if (!json.IsObject() || json.IsNull()) {
+        return;
+    }
+
+    rapidjson::Value::MemberIterator it = json.MemberBegin();
+    while (it != json.MemberEnd()) {
+        rapidjson::Value& value = it->value;
+        if (value.IsNull()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        compact_null_values(value, allocator);
+        if (value.IsObject() && value.ObjectEmpty()) {
+            it = json.EraseMember(it);
+            continue;
+        }
+        ++it;
+    }
+}
+
+// Construct rapidjson value from Subcolumns
+void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::AllocatorType& allocator,
+                             const ColumnObject::Subcolumns::Node* node_root) {
+    if (node_root == nullptr || node_root->children.empty()) {
+        root.SetNull();
+        return;
+    }
+    root.SetObject();
+    for (auto it = node_root->children.begin(); it != 
node_root->children.end(); ++it) {
+        auto child = it->get_second();
+        rapidjson::Value value(rapidjson::kObjectType);
+        get_json_by_column_tree(value, allocator, child.get());
+        root.AddMember(rapidjson::StringRef(it->get_first().data, 
it->get_first().size), value,
+                       allocator);
+    }
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, std::string* output) 
const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    rapidjson::StringBuffer buf;
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        *output = type->to_string(*get_root(), row);
+        return true;
+    }
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        // TODO avoid copy
+        *output = std::string(buf.GetString(), buf.GetSize());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_string(int row, BufferWritable& 
output) const {
+    if (!is_finalized()) {
+        const_cast<ColumnObject*>(this)->finalize();
+    }
+    if (is_scalar_variant()) {
+        auto type = get_root_type();
+        type->to_string(*get_root(), row, output);
+        return true;
+    }
+    rapidjson::StringBuffer buf;
+    bool res = serialize_one_row_to_json_format(row, &buf, nullptr);
+    if (res) {
+        output.write(buf.GetString(), buf.GetLength());
+    }
+    return res;
+}
+
+bool ColumnObject::serialize_one_row_to_json_format(int row, 
rapidjson::StringBuffer* output,
+                                                    bool* is_null) const {
+    CHECK(is_finalized());
+    if (subcolumns.empty()) {
+        if (is_null != nullptr) {
+            *is_null = true;
+        } else {
+            rapidjson::Value root(rapidjson::kNullType);
+            rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+            return root.Accept(writer);
+        }
+        return true;
+    }
+    CHECK(size() > row);
+    rapidjson::StringBuffer buffer;
+    rapidjson::Value root(rapidjson::kNullType);
+    if (doc_structure == nullptr) {
+        doc_structure = std::make_shared<rapidjson::Document>();
+        rapidjson::Document::AllocatorType& allocator = 
doc_structure->GetAllocator();
+        get_json_by_column_tree(*doc_structure, allocator, 
subcolumns.get_root());
+    }
+    if (!doc_structure->IsNull()) {
+        root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
+    }
+#ifndef NDEBUG
+    VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
+#endif
+    for (const auto& subcolumn : subcolumns) {
+        find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), 
subcolumn->path,
+                                 
subcolumn->data.get_least_common_type_serde(), root,
+                                 doc_structure->GetAllocator(), row);
+    }
+    compact_null_values(root, doc_structure->GetAllocator());
+    if (root.IsNull() && is_null != nullptr) {
+        // Fast path
+        *is_null = true;
+    } else {
+        output->Clear();
+        rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
+        return root.Accept(writer);
+    }
+    return true;
+}
+
+void ColumnObject::merge_sparse_to_root_column() {

Review Comment:
   warning: method 'merge_sparse_to_root_column' can be made const 
[readability-make-member-function-const]
   
   be/src/vec/columns/column_object.h:252:
   ```diff
   -     void merge_sparse_to_root_column();
   +     void merge_sparse_to_root_column() const;
   ```
   
   ```suggestion
   void ColumnObject::merge_sparse_to_root_column() const {
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to