This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch array-type
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 458bf2e3671d7d110adda9a66cd7898ff9cefa4c
Author: Adonis Ling <[email protected]>
AuthorDate: Sun Mar 20 23:10:51 2022 +0800

    [feature-wip](array-type) Support insertion for vectorized engine. (#8494)
---
 be/src/olap/row_block2.cpp                         | 107 +++++++----
 be/src/olap/row_block2.h                           |   3 +-
 be/src/vec/CMakeLists.txt                          |   1 +
 be/src/vec/columns/column_array.cpp                |   3 +-
 be/src/vec/columns/column_array.h                  |   4 +
 be/src/vec/core/block.cpp                          | 145 ++++++++++-----
 be/src/vec/core/block.h                            |  48 +++--
 be/src/vec/data_types/data_type_factory.cpp        | 114 ++++++------
 be/src/vec/data_types/data_type_factory.hpp        |  49 +++--
 .../vec/exprs/{vliteral.h => varray_literal.cpp}   |  39 ++--
 be/src/vec/exprs/{vliteral.h => varray_literal.h}  |  23 +--
 be/src/vec/exprs/vexpr.cpp                         |   9 +-
 be/src/vec/exprs/vliteral.cpp                      | 183 +++++++++---------
 be/src/vec/exprs/vliteral.h                        |  14 +-
 be/src/vec/sink/mysql_result_writer.cpp            |  54 +++++-
 be/test/runtime/array_test.cpp                     | 207 +++++++++------------
 be/test/test_util/CMakeLists.txt                   |   2 +
 be/test/test_util/array_utils.cpp                  |  72 +++++++
 .../vliteral.h => test/test_util/array_utils.h}    |  32 ++--
 be/test/util/array_parser_test.cpp                 | 131 ++++++-------
 20 files changed, 705 insertions(+), 535 deletions(-)

diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index b6a2a0b..aa13c1f 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -92,7 +92,8 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, 
RowBlock* dst) {
     return Status::OK();
 }
 
-Status RowBlockV2::_copy_data_to_column(int cid, 
doris::vectorized::MutableColumnPtr& origin_column) {
+Status RowBlockV2::_copy_data_to_column(int cid,
+                                        doris::vectorized::MutableColumnPtr& 
origin_column) {
     auto* column = origin_column.get();
     bool nullable_mark_array[_selected_size];
 
@@ -170,7 +171,7 @@ Status RowBlockV2::_copy_data_to_column(int cid, 
doris::vectorized::MutableColum
                 }
             }
         }
-        break;        
+        break;
     }
     case OLAP_FIELD_TYPE_MAP:
     case OLAP_FIELD_TYPE_VARCHAR: {
@@ -197,7 +198,8 @@ Status RowBlockV2::_copy_data_to_column(int cid, 
doris::vectorized::MutableColum
                 if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) {
                     column_string->insert_data(slice->data, slice->size);
                 } else {
-                    return Status::NotSupported("Not support string len over 
than 1MB in vec engine.");
+                    return Status::NotSupported(
+                            "Not support string len over than 1MB in vec 
engine.");
                 }
             } else {
                 column_string->insert_default();
@@ -291,9 +293,12 @@ Status RowBlockV2::_copy_data_to_column(int cid, 
doris::vectorized::MutableColum
             auto cv = reinterpret_cast<const 
CollectionValue*>(column_block(cid).cell_ptr(row_idx));
             if (!nullable_mark_array[j]) {
                 offset += cv->length();
-                _append_data_to_column(src_col->elements(), 
src_col->item_offset(row_idx), cv->length(), nested_col);
+                _append_data_to_column(src_col->elements(), 
src_col->item_offset(row_idx),
+                                       cv->length(), nested_col);
+                offsets_col.push_back(offset);
+            } else {
+                column_array->insert_default();
             }
-            offsets_col.emplace_back(offset);
         }
         break;
     }
@@ -345,11 +350,13 @@ Status RowBlockV2::_copy_data_to_column(int cid, 
doris::vectorized::MutableColum
     return Status::OK();
 }
 
-Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, 
uint16_t off, uint16_t len, doris::vectorized::MutableColumnPtr& origin_column) 
{
+Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, 
size_t start,
+                                          uint32_t len,
+                                          doris::vectorized::MutableColumnPtr& 
origin_column) {
     constexpr auto MAX_SIZE_OF_VEC_STRING = 1024l * 1024;
 
     auto* column = origin_column.get();
-    uint16_t selected_size = len;
+    uint32_t selected_size = len;
     bool nullable_mark_array[selected_size];
 
     bool column_nullable = origin_column->is_nullable();
@@ -360,8 +367,8 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
         column = nullable_column->get_nested_column_ptr().get();
 
         if (origin_nullable) {
-            for (uint16_t i = 0; i < selected_size; ++i) {
-                uint16_t row_idx = i + off;
+            for (uint32_t i = 0; i < selected_size; ++i) {
+                uint32_t row_idx = i + start;
                 null_map.push_back(batch->is_null_at(row_idx));
                 nullable_mark_array[i] = null_map.back();
             }
@@ -373,12 +380,12 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
         memset(nullable_mark_array, false, selected_size * sizeof(bool));
     }
 
-    auto insert_data_directly = [&nullable_mark_array](auto& batch, auto& 
column, auto& off, auto& len) {
-        for (uint16_t j = 0; j < len; ++j) {
+    auto insert_data_directly = [&nullable_mark_array](auto& batch, auto& 
column, auto& start,
+                                                       auto& length) {
+        for (uint32_t j = 0; j < length; ++j) {
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
-                column->insert_data(
-                        reinterpret_cast<const 
char*>(batch->cell_ptr(row_idx)), 0);
+                uint32_t row_idx = j + start;
+                column->insert_data(reinterpret_cast<const 
char*>(batch->cell_ptr(row_idx)), 0);
             } else {
                 column->insert_default();
             }
@@ -388,10 +395,10 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
     switch (batch->type_info()->type()) {
     case OLAP_FIELD_TYPE_OBJECT: {
         auto column_bitmap = assert_cast<vectorized::ColumnBitmap*>(column);
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             column_bitmap->insert_default();
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto slice = reinterpret_cast<const 
Slice*>(batch->cell_ptr(row_idx));
 
                 BitmapValue* pvalue = 
&column_bitmap->get_element(column_bitmap->size() - 1);
@@ -409,10 +416,10 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
     }
     case OLAP_FIELD_TYPE_HLL: {
         auto column_hll = assert_cast<vectorized::ColumnHLL*>(column);
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             column_hll->insert_default();
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto slice = reinterpret_cast<const 
Slice*>(batch->cell_ptr(row_idx));
 
                 HyperLogLog* pvalue = 
&column_hll->get_element(column_hll->size() - 1);
@@ -432,9 +439,9 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
     case OLAP_FIELD_TYPE_VARCHAR: {
         auto column_string = assert_cast<vectorized::ColumnString*>(column);
 
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto slice = reinterpret_cast<const 
Slice*>(batch->cell_ptr(row_idx));
                 column_string->insert_data(slice->data, slice->size);
             } else {
@@ -446,14 +453,15 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
     case OLAP_FIELD_TYPE_STRING: {
         auto column_string = assert_cast<vectorized::ColumnString*>(column);
 
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto slice = reinterpret_cast<const 
Slice*>(batch->cell_ptr(row_idx));
                 if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) {
                     column_string->insert_data(slice->data, slice->size);
                 } else {
-                    return Status::NotSupported("Not support string len over 
than 1MB in vec engine.");
+                    return Status::NotSupported(
+                            "Not support string len over than 1MB in vec 
engine.");
                 }
             } else {
                 column_string->insert_default();
@@ -464,9 +472,9 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
     case OLAP_FIELD_TYPE_CHAR: {
         auto column_string = assert_cast<vectorized::ColumnString*>(column);
 
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto slice = reinterpret_cast<const 
Slice*>(batch->cell_ptr(row_idx));
                 column_string->insert_data(slice->data, strnlen(slice->data, 
slice->size));
             } else {
@@ -478,9 +486,9 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
     case OLAP_FIELD_TYPE_DATE: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(column);
 
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto ptr = reinterpret_cast<const 
char*>(batch->cell_ptr(row_idx));
 
                 uint64_t value = 0;
@@ -500,9 +508,9 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
     case OLAP_FIELD_TYPE_DATETIME: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(column);
 
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto ptr = reinterpret_cast<const 
char*>(batch->cell_ptr(row_idx));
 
                 uint64_t value = *reinterpret_cast<const uint64_t*>(ptr);
@@ -518,9 +526,9 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
         auto column_decimal =
                 
assert_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>(column);
 
-        for (uint16_t j = 0; j < selected_size; ++j) {
+        for (uint32_t j = 0; j < selected_size; ++j) {
             if (!nullable_mark_array[j]) {
-                uint16_t row_idx = j + off;
+                uint32_t row_idx = j + start;
                 auto ptr = reinterpret_cast<const 
char*>(batch->cell_ptr(row_idx));
 
                 int64_t int_value = *(int64_t*)(ptr);
@@ -533,44 +541,65 @@ Status RowBlockV2::_append_data_to_column(const 
ColumnVectorBatch* batch, uint16
         }
         break;
     }
+    case OLAP_FIELD_TYPE_ARRAY: {
+        auto array_batch = reinterpret_cast<const 
ArrayColumnVectorBatch*>(batch);
+        auto column_array = assert_cast<vectorized::ColumnArray*>(column);
+        auto nested_col = (*column_array->get_data_ptr()).assume_mutable();
+
+        auto& offsets_col = column_array->get_offsets();
+        uint32_t offset = offsets_col.back();
+        for (uint32_t j = 0; j < selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint32_t row_idx = j + start;
+                auto cv = reinterpret_cast<const 
CollectionValue*>(batch->cell_ptr(row_idx));
+                offset += cv->length();
+                _append_data_to_column(array_batch->elements(), 
array_batch->item_offset(row_idx),
+                                       cv->length(), nested_col);
+                offsets_col.push_back(offset);
+            } else {
+                column_array->insert_default();
+            }
+        }
+        break;
+    }
     case OLAP_FIELD_TYPE_INT: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::Int32>*>(column);
-        insert_data_directly(batch, column_int, off, len);
+        insert_data_directly(batch, column_int, start, len);
         break;
     }
     case OLAP_FIELD_TYPE_BOOL: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::UInt8>*>(column);
-        insert_data_directly(batch, column_int, off, len);
+        insert_data_directly(batch, column_int, start, len);
         break;
     }
     case OLAP_FIELD_TYPE_TINYINT: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::Int8>*>(column);
-        insert_data_directly(batch, column_int, off, len);
+        insert_data_directly(batch, column_int, start, len);
         break;
     }
     case OLAP_FIELD_TYPE_SMALLINT: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::Int16>*>(column);
-        insert_data_directly(batch, column_int, off, len);
+        insert_data_directly(batch, column_int, start, len);
         break;
     }
     case OLAP_FIELD_TYPE_BIGINT: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(column);
-        insert_data_directly(batch, column_int, off, len);
+        insert_data_directly(batch, column_int, start, len);
         break;
     }
     case OLAP_FIELD_TYPE_LARGEINT: {
         auto column_int = 
assert_cast<vectorized::ColumnVector<vectorized::Int128>*>(column);
-        insert_data_directly(batch, column_int, off, len);
+        insert_data_directly(batch, column_int, start, len);
         break;
     }
     case OLAP_FIELD_TYPE_FLOAT: {
         auto column_float = 
assert_cast<vectorized::ColumnVector<vectorized::Float32>*>(column);
-        insert_data_directly(batch, column_float, off, len);
+        insert_data_directly(batch, column_float, start, len);
         break;
     }
     case OLAP_FIELD_TYPE_DOUBLE: {
         auto column_float = 
assert_cast<vectorized::ColumnVector<vectorized::Float64>*>(column);
-        insert_data_directly(batch, column_float, off, len);
+        insert_data_directly(batch, column_float, start, len);
         break;
     }
     default: {
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index f2e9e5b..fda9a49 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -110,7 +110,8 @@ public:
 
 private:
     Status _copy_data_to_column(int cid, vectorized::MutableColumnPtr& 
mutable_column_ptr);
-    Status _append_data_to_column(const ColumnVectorBatch* batch, uint16_t 
off, uint16_t len, vectorized::MutableColumnPtr& mutable_column_ptr);
+    Status _append_data_to_column(const ColumnVectorBatch* batch, size_t 
start, uint32_t len,
+                                  vectorized::MutableColumnPtr& 
mutable_column_ptr);
 
     const Schema& _schema;
     size_t _capacity;
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 74dca7e..d492417 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -102,6 +102,7 @@ set(VEC_FILES
   exprs/vexpr.cpp
   exprs/vexpr_context.cpp
   exprs/vliteral.cpp
+  exprs/varray_literal.cpp
   exprs/vin_predicate.cpp
   exprs/vtuple_is_null_predicate.cpp
   exprs/vslot_ref.cpp
diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index be611af..4754ca3 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -311,7 +311,8 @@ void ColumnArray::insert_range_from(const IColumn & src, 
size_t start, size_t le
         cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
     } else {
         size_t old_size = cur_offsets.size();
-        size_t prev_max_offset = old_size ? cur_offsets.back() : 0;
+        // -1 is ok, because PaddedPODArray pads zeros on the left.
+        size_t prev_max_offset = cur_offsets.back();
         cur_offsets.resize(old_size + length);
 
         for (size_t i = 0; i < length; ++i)
diff --git a/be/src/vec/columns/column_array.h 
b/be/src/vec/columns/column_array.h
index f3b8b55..e6567d1 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -144,6 +144,10 @@ public:
     void replace_column_data_default(size_t self_row = 0) override {
         LOG(FATAL) << "replace_column_data_default not implemented";
     }
+    void clear() override {
+        data->clear();
+        offsets->clear();
+    }
 
 private:
     WrappedPtr data;
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 831e9f7..0cec8ab 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -32,6 +32,7 @@
 #include "runtime/row_batch.h"
 #include "runtime/tuple.h"
 #include "runtime/tuple_row.h"
+#include "udf/udf.h"
 #include "vec/columns/column_const.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_vector.h"
@@ -701,63 +702,119 @@ doris::Tuple* Block::deep_copy_tuple(const 
doris::TupleDescriptor& desc, MemPool
 
     for (int i = 0; i < desc.slots().size(); ++i) {
         auto slot_desc = desc.slots()[i];
-        auto data_ref = get_by_position(column_offset + 
i).column->get_data_at(row);
-
-        if (data_ref.data == nullptr) {
+        auto& type_desc = slot_desc->type();
+        const auto& column = get_by_position(column_offset + i).column;
+        const auto& data_ref =
+                type_desc.type != TYPE_ARRAY ? column->get_data_at(row) : 
StringRef();
+        bool is_null = is_column_data_null(slot_desc->type(), data_ref, 
column, row);
+        if (is_null) {
             dst->set_null(slot_desc->null_indicator_offset());
-            continue;
         } else {
             dst->set_not_null(slot_desc->null_indicator_offset());
         }
+        deep_copy_slot(dst->get_slot(slot_desc->tuple_offset()), pool, 
type_desc, data_ref,
+                       column.get(), row, padding_char);
+    }
+    return dst;
+}
+
+inline bool Block::is_column_data_null(const doris::TypeDescriptor& type_desc,
+                                       const StringRef& data_ref, const 
IColumn* column, int row) {
+    if (type_desc.type != TYPE_ARRAY) {
+        return data_ref.data == nullptr;
+    } else {
+        Field array;
+        column->get(row, array);
+        return array.is_null();
+    }
+}
+
+// TODO: need to refactor this function, too long.
+void Block::deep_copy_slot(void* dst, MemPool* pool, const 
doris::TypeDescriptor& type_desc,
+                           const StringRef& data_ref, const IColumn* column, 
int row,
+                           bool padding_char) {
+    if (type_desc.is_collection_type()) {
+        if (type_desc.type != TYPE_ARRAY) {
+            return;
+        }
 
-        if (!slot_desc->type().is_string_type() && 
!slot_desc->type().is_date_type()) {
-            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), 
data_ref.data, data_ref.size);
-        } else if (slot_desc->type().is_string_type() && slot_desc->type() != 
TYPE_OBJECT &&
-                   slot_desc->type() != TYPE_HLL) {
-            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), (const 
void*)(&data_ref),
-                   sizeof(data_ref));
-            // Copy the content of string
-            if (padding_char && slot_desc->type() == TYPE_CHAR) {
-                // serialize the content of string
-                auto string_slot = 
dst->get_string_slot(slot_desc->tuple_offset());
-                string_slot->ptr = 
reinterpret_cast<char*>(pool->allocate(slot_desc->type().len));
-                string_slot->len = slot_desc->type().len;
-                memset(string_slot->ptr, 0, slot_desc->type().len);
-                memcpy(string_slot->ptr, data_ref.data, data_ref.size);
+        Field field;
+        column->get(row, field);
+        const auto& array = field.get<Array>();
+        auto collection_value = reinterpret_cast<CollectionValue*>(dst);
+        auto item_type_desc = type_desc.children.front();
+        CollectionValue::init_collection(pool, array.size(), 
item_type_desc.type, collection_value);
+
+        const ColumnArray* array_column = nullptr;
+        if (is_column_nullable(*column)) {
+            auto& nested_column =
+                    reinterpret_cast<const 
ColumnNullable*>(column)->get_nested_column();
+            array_column = reinterpret_cast<const 
ColumnArray*>(&nested_column);
+        } else {
+            array_column = reinterpret_cast<const ColumnArray*>(column);
+        }
+        auto item_column = array_column->get_data_ptr().get();
+        auto offset = array_column->get_offsets()[row - 1];
+        for (int i = 0; i < collection_value->length(); ++i) {
+            char* item_dst = 
reinterpret_cast<char*>(collection_value->mutable_data()) +
+                             i * item_type_desc.get_slot_size();
+            if (array[i].is_null()) {
+                const auto& null_value = doris_udf::AnyVal(true);
+                collection_value->set(i, item_type_desc.type, &null_value);
             } else {
-                auto str_ptr = pool->allocate(data_ref.size);
-                memcpy(str_ptr, data_ref.data, data_ref.size);
-                dst->get_string_slot(slot_desc->tuple_offset())->ptr =
-                        reinterpret_cast<char*>(str_ptr);
+                auto item_offset = offset + i;
+                const auto& data_ref = item_type_desc.type != TYPE_ARRAY
+                                               ? 
item_column->get_data_at(item_offset)
+                                               : StringRef();
+                deep_copy_slot(item_dst, pool, item_type_desc, data_ref, 
item_column, item_offset,
+                               padding_char);
             }
-        } else if (slot_desc->type() == TYPE_OBJECT) {
-            auto bitmap_value = (BitmapValue*)(data_ref.data);
-            auto size = bitmap_value->getSizeInBytes();
-
+        }
+    } else if (type_desc.is_date_type()) {
+        VecDateTimeValue ts =
+                *reinterpret_cast<const 
doris::vectorized::VecDateTimeValue*>(data_ref.data);
+        DateTimeValue dt;
+        ts.convert_vec_dt_to_dt(&dt);
+        memcpy(dst, &dt, sizeof(DateTimeValue));
+    } else if (type_desc.type == TYPE_OBJECT) {
+        auto bitmap_value = (BitmapValue*)(data_ref.data);
+        auto size = bitmap_value->getSizeInBytes();
+
+        // serialize the content of string
+        auto string_slot = reinterpret_cast<StringValue*>(dst);
+        string_slot->ptr = reinterpret_cast<char*>(pool->allocate(size));
+        bitmap_value->write(string_slot->ptr);
+        string_slot->len = size;
+    } else if (type_desc.type == TYPE_HLL) {
+        auto hll_value = (HyperLogLog*)(data_ref.data);
+        auto size = hll_value->max_serialized_size();
+        auto string_slot = reinterpret_cast<StringValue*>(dst);
+        string_slot->ptr = reinterpret_cast<char*>(pool->allocate(size));
+        size_t actual_size = hll_value->serialize((uint8_t*)string_slot->ptr);
+        string_slot->len = actual_size;
+    } else if (type_desc.is_string_type()) { // TYPE_OBJECT and TYPE_HLL must 
be handled before.
+        memcpy(dst, (const void*)(&data_ref), sizeof(data_ref));
+        // Copy the content of string
+        if (padding_char && type_desc.type == TYPE_CHAR) {
             // serialize the content of string
-            auto string_slot = dst->get_string_slot(slot_desc->tuple_offset());
-            string_slot->ptr = reinterpret_cast<char*>(pool->allocate(size));
-            bitmap_value->write(string_slot->ptr);
-            string_slot->len = size;
-        } else if (slot_desc->type() == TYPE_HLL) {
-            auto hll_value = (HyperLogLog*)(data_ref.data);
-            auto size = hll_value->max_serialized_size();
-            auto string_slot = dst->get_string_slot(slot_desc->tuple_offset());
-            string_slot->ptr = reinterpret_cast<char*>(pool->allocate(size));
-            size_t actual_size = 
hll_value->serialize((uint8_t*)string_slot->ptr);
-            string_slot->len = actual_size;
+            auto string_slot = reinterpret_cast<StringValue*>(dst);
+            string_slot->ptr = 
reinterpret_cast<char*>(pool->allocate(type_desc.len));
+            string_slot->len = type_desc.len;
+            memset(string_slot->ptr, 0, type_desc.len);
+            memcpy(string_slot->ptr, data_ref.data, data_ref.size);
         } else {
-            VecDateTimeValue ts =
-                    *reinterpret_cast<const 
doris::vectorized::VecDateTimeValue*>(data_ref.data);
-            DateTimeValue dt;
-            ts.convert_vec_dt_to_dt(&dt);
-            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), &dt, 
sizeof(DateTimeValue));
+            auto str_ptr = pool->allocate(data_ref.size);
+            memcpy(str_ptr, data_ref.data, data_ref.size);
+            auto string_slot = reinterpret_cast<StringValue*>(dst);
+            string_slot->ptr = reinterpret_cast<char*>(str_ptr);
+            string_slot->len = data_ref.size;
         }
+    } else {
+        memcpy(dst, data_ref.data, data_ref.size);
     }
-    return dst;
 }
 
-MutableBlock::MutableBlock(const std::vector<TupleDescriptor *>& tuple_descs) {
+MutableBlock::MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs) {
     for (auto tuple_desc : tuple_descs) {
         for (auto slot_desc : tuple_desc->slots()) {
             _data_types.emplace_back(slot_desc->get_data_type_ptr());
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index c660bdb..12145f6 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -20,12 +20,13 @@
 
 #pragma once
 
+#include <parallel_hashmap/phmap.h>
+
 #include <initializer_list>
 #include <list>
 #include <set>
 #include <utility>
 #include <vector>
-#include <parallel_hashmap/phmap.h>
 
 #include "gen_cpp/data.pb.h"
 #include "vec/columns/column_nullable.h"
@@ -36,12 +37,14 @@
 #include "vec/data_types/data_type_nullable.h"
 
 namespace doris {
-class Status;
+
+class MemPool;
 class RowBatch;
 class RowDescriptor;
+class Status;
 class Tuple;
 class TupleDescriptor;
-class MemPool;
+struct TypeDescriptor;
 
 namespace vectorized {
 
@@ -97,17 +100,23 @@ public:
     ColumnWithTypeAndName& get_by_position(size_t position) { return 
data[position]; }
     const ColumnWithTypeAndName& get_by_position(size_t position) const { 
return data[position]; }
 
-    Status copy_column_data_to_block(bool is_block_mem_reuse, 
doris::vectorized::IColumn* input_col_ptr, 
-        uint16_t* sel_rowid_idx, uint16_t select_size, int block_cid, size_t 
batch_size) {
+    Status copy_column_data_to_block(bool is_block_mem_reuse,
+                                     doris::vectorized::IColumn* input_col_ptr,
+                                     uint16_t* sel_rowid_idx, uint16_t 
select_size, int block_cid,
+                                     size_t batch_size) {
         if (is_block_mem_reuse) {
             auto* raw_res_ptr = this->get_by_position(block_cid).column.get();
             
const_cast<doris::vectorized::IColumn*>(raw_res_ptr)->reserve(batch_size);
-            return input_col_ptr->filter_by_selector(sel_rowid_idx, 
select_size, const_cast<doris::vectorized::IColumn*>(raw_res_ptr));
+            return input_col_ptr->filter_by_selector(
+                    sel_rowid_idx, select_size,
+                    const_cast<doris::vectorized::IColumn*>(raw_res_ptr));
         } else {
             MutableColumnPtr res_col_ptr = 
data[block_cid].type->create_column();
             res_col_ptr->reserve(batch_size);
             auto* raw_res_ptr = res_col_ptr.get();
-            RETURN_IF_ERROR(input_col_ptr->filter_by_selector(sel_rowid_idx, 
select_size, const_cast<doris::vectorized::IColumn*>(raw_res_ptr)));
+            RETURN_IF_ERROR(input_col_ptr->filter_by_selector(
+                    sel_rowid_idx, select_size,
+                    const_cast<doris::vectorized::IColumn*>(raw_res_ptr)));
             this->replace_by_position(block_cid, std::move(res_col_ptr));
             return Status::OK();
         }
@@ -148,9 +157,9 @@ public:
     Names get_names() const;
     DataTypes get_data_types() const;
 
-    DataTypePtr get_data_type(size_t index) const { 
+    DataTypePtr get_data_type(size_t index) const {
         CHECK(index < data.size());
-        return data[index].type; 
+        return data[index].type;
     }
 
     /// Returns number of rows from first column in block, not equal to 
nullptr. If no columns, returns 0.
@@ -160,7 +169,7 @@ public:
     void set_num_rows(size_t length);
 
     // Skip the rows in block, use in OFFSET, LIMIT operation
-    void skip_num_rows(int64_t & offset);
+    void skip_num_rows(int64_t& offset);
 
     size_t columns() const { return data.size(); }
 
@@ -233,7 +242,8 @@ public:
     }
 
     // serialize block to PBlock
-    Status serialize(PBlock* pblock, size_t* uncompressed_bytes, size_t* 
compressed_bytes, std::string* allocated_buf) const;
+    Status serialize(PBlock* pblock, size_t* uncompressed_bytes, size_t* 
compressed_bytes,
+                     std::string* allocated_buf) const;
 
     // serialize block to PRowbatch
     void serialize(RowBatch*, const RowDescriptor&);
@@ -276,17 +286,25 @@ public:
 
     //note(wb) no DCHECK here, because this method is only used after 
compare_at now, so no need to repeat check here.
     // If this method is used in more places, you can add DCHECK case by case.
-    int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& 
rhs, int nan_direction_hint) const {
-        auto res = get_by_position(col_idx).column->compare_at(n, m, 
*(rhs.get_by_position(col_idx).column),
-                                                             
nan_direction_hint);
+    int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& rhs,
+                          int nan_direction_hint) const {
+        auto res = get_by_position(col_idx).column->compare_at(
+                n, m, *(rhs.get_by_position(col_idx).column), 
nan_direction_hint);
         return res;
     }
 
-    doris::Tuple* deep_copy_tuple(const TupleDescriptor&, MemPool*, int, int, 
bool padding_char = false);
+    doris::Tuple* deep_copy_tuple(const TupleDescriptor&, MemPool*, int, int,
+                                  bool padding_char = false);
 
 private:
     void erase_impl(size_t position);
     void initialize_index_by_name();
+    inline bool is_column_data_null(const doris::TypeDescriptor& type_desc,
+                                    const StringRef& data_ref,
+                                    const IColumn* column_with_type_and_name, 
int row);
+    void deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor& 
type_desc,
+                        const StringRef& data_ref, const IColumn* column, int 
row,
+                        bool padding_char);
 };
 
 using Blocks = std::vector<Block>;
diff --git a/be/src/vec/data_types/data_type_factory.cpp 
b/be/src/vec/data_types/data_type_factory.cpp
index 44e9e78..5c6e39f 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -32,7 +32,7 @@ DataTypePtr DataTypeFactory::create_data_type(const 
doris::Field& col_desc) {
     }
 
     if (col_desc.is_nullable() && nested) {
-        return std::make_shared<DataTypeNullable>(std::move(nested));
+        return std::make_shared<DataTypeNullable>(nested);
     }
     return nested;
 }
@@ -47,7 +47,7 @@ DataTypePtr DataTypeFactory::create_data_type(const 
TabletColumn& col_desc) {
     }
 
     if (col_desc.is_nullable() && nested) {
-        return std::make_shared<DataTypeNullable>(std::move(nested));
+        return std::make_shared<DataTypeNullable>(nested);
     }
     return nested;
 }
@@ -104,7 +104,8 @@ DataTypePtr DataTypeFactory::create_data_type(const 
TypeDescriptor& col_desc, bo
         break;
     case TYPE_ARRAY:
         DCHECK(col_desc.children.size() == 1);
-        nested = 
std::make_shared<vectorized::DataTypeArray>(create_data_type(col_desc.children[0],
 false));
+        nested =
+                
std::make_shared<vectorized::DataTypeArray>(create_data_type(col_desc.children[0]));
         break;
     case INVALID_TYPE:
     default:
@@ -113,7 +114,7 @@ DataTypePtr DataTypeFactory::create_data_type(const 
TypeDescriptor& col_desc, bo
     }
 
     if (nested && is_nullable) {
-        return 
std::make_shared<vectorized::DataTypeNullable>(std::move(nested));
+        return std::make_shared<vectorized::DataTypeNullable>(nested);
     }
     return nested;
 }
@@ -121,54 +122,54 @@ DataTypePtr DataTypeFactory::create_data_type(const 
TypeDescriptor& col_desc, bo
 DataTypePtr DataTypeFactory::_create_primitive_data_type(const FieldType& 
type) const {
     DataTypePtr result = nullptr;
     switch (type) {
-        case OLAP_FIELD_TYPE_BOOL:
-            result = std::make_shared<vectorized::DataTypeUInt8>();
-            break;
-        case OLAP_FIELD_TYPE_TINYINT:
-            result = std::make_shared<vectorized::DataTypeInt8>();
-            break;
-        case OLAP_FIELD_TYPE_SMALLINT:
-            result = std::make_shared<vectorized::DataTypeInt16>();
-            break;
-        case OLAP_FIELD_TYPE_INT:
-            result = std::make_shared<vectorized::DataTypeInt32>();
-            break;
-        case OLAP_FIELD_TYPE_FLOAT:
-            result = std::make_shared<vectorized::DataTypeFloat32>();
-            break;
-        case OLAP_FIELD_TYPE_BIGINT:
-            result = std::make_shared<vectorized::DataTypeInt64>();
-            break;
-        case OLAP_FIELD_TYPE_LARGEINT:
-            result = std::make_shared<vectorized::DataTypeInt128>();
-            break;
-        case OLAP_FIELD_TYPE_DATE:
-            result = std::make_shared<vectorized::DataTypeDate>();
-            break;
-        case OLAP_FIELD_TYPE_DATETIME:
-            result = std::make_shared<vectorized::DataTypeDateTime>();
-            break;
-        case OLAP_FIELD_TYPE_DOUBLE:
-            result = std::make_shared<vectorized::DataTypeFloat64>();
-            break;
-        case OLAP_FIELD_TYPE_CHAR:
-        case OLAP_FIELD_TYPE_VARCHAR:
-        case OLAP_FIELD_TYPE_HLL:
-        case OLAP_FIELD_TYPE_STRING:
-            result = std::make_shared<vectorized::DataTypeString>();
-            break;
-        case OLAP_FIELD_TYPE_OBJECT:
-            result = std::make_shared<vectorized::DataTypeBitMap>();
-            break;
-        case OLAP_FIELD_TYPE_DECIMAL:
-            result = 
std::make_shared<vectorized::DataTypeDecimal<vectorized::Decimal128>>(27, 9);
-            break;
-        default:
-            DCHECK(false) << "Invalid FieldType:" << (int)type;
-            result = nullptr;
-            break;
+    case OLAP_FIELD_TYPE_BOOL:
+        result = std::make_shared<vectorized::DataTypeUInt8>();
+        break;
+    case OLAP_FIELD_TYPE_TINYINT:
+        result = std::make_shared<vectorized::DataTypeInt8>();
+        break;
+    case OLAP_FIELD_TYPE_SMALLINT:
+        result = std::make_shared<vectorized::DataTypeInt16>();
+        break;
+    case OLAP_FIELD_TYPE_INT:
+        result = std::make_shared<vectorized::DataTypeInt32>();
+        break;
+    case OLAP_FIELD_TYPE_FLOAT:
+        result = std::make_shared<vectorized::DataTypeFloat32>();
+        break;
+    case OLAP_FIELD_TYPE_BIGINT:
+        result = std::make_shared<vectorized::DataTypeInt64>();
+        break;
+    case OLAP_FIELD_TYPE_LARGEINT:
+        result = std::make_shared<vectorized::DataTypeInt128>();
+        break;
+    case OLAP_FIELD_TYPE_DATE:
+        result = std::make_shared<vectorized::DataTypeDate>();
+        break;
+    case OLAP_FIELD_TYPE_DATETIME:
+        result = std::make_shared<vectorized::DataTypeDateTime>();
+        break;
+    case OLAP_FIELD_TYPE_DOUBLE:
+        result = std::make_shared<vectorized::DataTypeFloat64>();
+        break;
+    case OLAP_FIELD_TYPE_CHAR:
+    case OLAP_FIELD_TYPE_VARCHAR:
+    case OLAP_FIELD_TYPE_HLL:
+    case OLAP_FIELD_TYPE_STRING:
+        result = std::make_shared<vectorized::DataTypeString>();
+        break;
+    case OLAP_FIELD_TYPE_OBJECT:
+        result = std::make_shared<vectorized::DataTypeBitMap>();
+        break;
+    case OLAP_FIELD_TYPE_DECIMAL:
+        result = 
std::make_shared<vectorized::DataTypeDecimal<vectorized::Decimal128>>(27, 9);
+        break;
+    default:
+        DCHECK(false) << "Invalid FieldType:" << (int)type;
+        result = nullptr;
+        break;
     }
-    return result;    
+    return result;
 }
 
 DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) {
@@ -221,22 +222,22 @@ DataTypePtr DataTypeFactory::create_data_type(const 
PColumnMeta& pcolumn) {
         break;
     case PGenericType::DECIMAL32:
         nested = 
std::make_shared<DataTypeDecimal<Decimal32>>(pcolumn.decimal_param().precision(),
-                                                            
pcolumn.decimal_param().scale());
+                                                              
pcolumn.decimal_param().scale());
         break;
     case PGenericType::DECIMAL64:
         nested = 
std::make_shared<DataTypeDecimal<Decimal64>>(pcolumn.decimal_param().precision(),
-                                                            
pcolumn.decimal_param().scale());
+                                                              
pcolumn.decimal_param().scale());
         break;
     case PGenericType::DECIMAL128:
         nested = 
std::make_shared<DataTypeDecimal<Decimal128>>(pcolumn.decimal_param().precision(),
-                                                             
pcolumn.decimal_param().scale());
+                                                               
pcolumn.decimal_param().scale());
         break;
     case PGenericType::BITMAP:
         nested = std::make_shared<DataTypeBitMap>();
         break;
     case PGenericType::LIST:
         DCHECK(pcolumn.children_size() == 1);
-        nested = 
std::make_shared<DataTypeArray>(std::move(create_data_type(pcolumn.children(0))));
+        nested = 
std::make_shared<DataTypeArray>(create_data_type(pcolumn.children(0)));
         break;
     default: {
         LOG(FATAL) << fmt::format("Unknown data type: {}", pcolumn.type());
@@ -245,10 +246,9 @@ DataTypePtr DataTypeFactory::create_data_type(const 
PColumnMeta& pcolumn) {
     }
 
     if (nested && pcolumn.is_nullable() > 0) {
-        return 
std::make_shared<vectorized::DataTypeNullable>(std::move(nested));
+        return std::make_shared<vectorized::DataTypeNullable>(nested);
     }
     return nested;
 }
 
-
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_factory.hpp 
b/be/src/vec/data_types/data_type_factory.hpp
index 7834b12..c78a4ef 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -26,7 +26,6 @@
 #include "olap/field.h"
 #include "olap/tablet_schema.h"
 #include "runtime/types.h"
-
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_array.h"
 #include "vec/data_types/data_type_bitmap.h"
@@ -34,45 +33,45 @@
 #include "vec/data_types/data_type_date_time.h"
 #include "vec/data_types/data_type_decimal.h"
 #include "vec/data_types/data_type_nothing.h"
-#include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
 
 namespace doris::vectorized {
 
 class DataTypeFactory {
-using DataTypeMap = std::unordered_map<std::string, DataTypePtr>;
-using InvertedDataTypeMap = std::vector<std::pair<DataTypePtr, std::string>>;
+    using DataTypeMap = std::unordered_map<std::string, DataTypePtr>;
+    using InvertedDataTypeMap = std::vector<std::pair<DataTypePtr, 
std::string>>;
 
 public:
     static DataTypeFactory& instance() {
         static std::once_flag oc;
         static DataTypeFactory instance;
-        std::call_once(oc, [&]() {
-            instance.regist_data_type("UInt8", 
DataTypePtr(std::make_shared<DataTypeUInt8>()));
-            instance.regist_data_type("UInt16", 
DataTypePtr(std::make_shared<DataTypeUInt16>()));
-            instance.regist_data_type("UInt32", 
DataTypePtr(std::make_shared<DataTypeUInt32>()));
-            instance.regist_data_type("UInt64", 
DataTypePtr(std::make_shared<DataTypeUInt64>()));
-            instance.regist_data_type("Int8", 
DataTypePtr(std::make_shared<DataTypeInt8>()));
-            instance.regist_data_type("Int16", 
DataTypePtr(std::make_shared<DataTypeInt16>()));
-            instance.regist_data_type("Int32", 
DataTypePtr(std::make_shared<DataTypeInt32>()));
-            instance.regist_data_type("Int64", 
DataTypePtr(std::make_shared<DataTypeInt64>()));
-            instance.regist_data_type("Int128", 
DataTypePtr(std::make_shared<DataTypeInt128>()));
-            instance.regist_data_type("Float32", 
DataTypePtr(std::make_shared<DataTypeFloat32>()));
-            instance.regist_data_type("Float64", 
DataTypePtr(std::make_shared<DataTypeFloat64>()));
-            instance.regist_data_type("Date", 
DataTypePtr(std::make_shared<DataTypeDate>()));
-            instance.regist_data_type("DateTime",
-                                      
DataTypePtr(std::make_shared<DataTypeDateTime>()));
-            instance.regist_data_type("String", 
DataTypePtr(std::make_shared<DataTypeString>()));
-            instance.regist_data_type("Decimal",
-                    
DataTypePtr(std::make_shared<DataTypeDecimal<Decimal128>>(27, 9)));
+        std::call_once(oc, []() {
+            instance.register_data_type("UInt8", 
std::make_shared<DataTypeUInt8>());
+            instance.register_data_type("UInt16", 
std::make_shared<DataTypeUInt16>());
+            instance.register_data_type("UInt32", 
std::make_shared<DataTypeUInt32>());
+            instance.register_data_type("UInt64", 
std::make_shared<DataTypeUInt64>());
+            instance.register_data_type("Int8", 
std::make_shared<DataTypeInt8>());
+            instance.register_data_type("Int16", 
std::make_shared<DataTypeInt16>());
+            instance.register_data_type("Int32", 
std::make_shared<DataTypeInt32>());
+            instance.register_data_type("Int64", 
std::make_shared<DataTypeInt64>());
+            instance.register_data_type("Int128", 
std::make_shared<DataTypeInt128>());
+            instance.register_data_type("Float32", 
std::make_shared<DataTypeFloat32>());
+            instance.register_data_type("Float64", 
std::make_shared<DataTypeFloat64>());
+            instance.register_data_type("Date", 
std::make_shared<DataTypeDate>());
+            instance.register_data_type("DateTime", 
std::make_shared<DataTypeDateTime>());
+            instance.register_data_type("String", 
std::make_shared<DataTypeString>());
+            instance.register_data_type("Decimal",
+                                      
std::make_shared<DataTypeDecimal<Decimal128>>(27, 9));
         });
         return instance;
     }
     DataTypePtr get(const std::string& name) { return _data_type_map[name]; }
     const std::string& get(const DataTypePtr& data_type) const {
-        auto type_ptr = data_type->is_nullable() ?
-                        
((DataTypeNullable*)(data_type.get()))->get_nested_type() : data_type;
+        auto type_ptr = data_type->is_nullable()
+                                ? 
((DataTypeNullable*)(data_type.get()))->get_nested_type()
+                                : data_type;
         for (const auto& entity : _invert_data_type_map) {
             if (entity.first->equals(*type_ptr)) {
                 return entity.second;
@@ -91,7 +90,7 @@ public:
 private:
     DataTypePtr _create_primitive_data_type(const FieldType& type) const;
 
-    void regist_data_type(const std::string& name, const DataTypePtr& 
data_type) {
+    void register_data_type(const std::string& name, const DataTypePtr& 
data_type) {
         _data_type_map.emplace(name, data_type);
         _invert_data_type_map.emplace_back(data_type, name);
     }
diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/varray_literal.cpp
similarity index 52%
copy from be/src/vec/exprs/vliteral.h
copy to be/src/vec/exprs/varray_literal.cpp
index d1d3dcc..d289c17 100644
--- a/be/src/vec/exprs/vliteral.h
+++ b/be/src/vec/exprs/varray_literal.cpp
@@ -15,31 +15,24 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#pragma once
+#include "vec/exprs/varray_literal.h"
 
-#include "vec/columns/column.h"
-#include "vec/columns/column_const.h"
-#include "vec/exprs/vexpr.h"
+namespace doris::vectorized {
 
-namespace doris {
-class TExprNode;
+Status VArrayLiteral::prepare(RuntimeState* state, const RowDescriptor& 
row_desc,
+                              VExprContext* context) {
+    DCHECK_EQ(type().children.size(), 1) << "array children type not 1";
 
-namespace vectorized {
-class VLiteral : public VExpr {
-public:
-    virtual ~VLiteral();
-    VLiteral(const TExprNode& node);
-    virtual Status execute(VExprContext* context, vectorized::Block* block,
-                           int* result_column_id) override;
-    virtual const std::string& expr_name() const override { return _expr_name; 
}
-    virtual VExpr* clone(doris::ObjectPool* pool) const override {
-        return pool->add(new VLiteral(*this));
+    RETURN_IF_ERROR(VExpr::prepare(state, row_desc, context));
+    bool is_null = (_node_type == TExprNodeType::NULL_LITERAL);
+    Field array = is_null ? Field() : Array();
+    for (const auto child : _children) {
+        Field item;
+        child->get_const_col(context)->column_ptr->get(0, item);
+        array.get<Array>().push_back(item);
     }
+    _column_ptr = _data_type->create_column_const(1, array);
+    return Status::OK();
+}
 
-private:
-    ColumnPtr _column_ptr;
-    std::string _expr_name;
-};
-} // namespace vectorized
-
-} // namespace doris
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/varray_literal.h
similarity index 60%
copy from be/src/vec/exprs/vliteral.h
copy to be/src/vec/exprs/varray_literal.h
index d1d3dcc..56b5f8e 100644
--- a/be/src/vec/exprs/vliteral.h
+++ b/be/src/vec/exprs/varray_literal.h
@@ -17,28 +17,17 @@
 
 #pragma once
 
-#include "vec/columns/column.h"
-#include "vec/columns/column_const.h"
-#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vliteral.h"
 
 namespace doris {
-class TExprNode;
 
 namespace vectorized {
-class VLiteral : public VExpr {
+class VArrayLiteral : public VLiteral {
 public:
-    virtual ~VLiteral();
-    VLiteral(const TExprNode& node);
-    virtual Status execute(VExprContext* context, vectorized::Block* block,
-                           int* result_column_id) override;
-    virtual const std::string& expr_name() const override { return _expr_name; 
}
-    virtual VExpr* clone(doris::ObjectPool* pool) const override {
-        return pool->add(new VLiteral(*this));
-    }
-
-private:
-    ColumnPtr _column_ptr;
-    std::string _expr_name;
+    VArrayLiteral(const TExprNode& node) : VLiteral(node, false) {}
+    virtual ~VArrayLiteral() = default;
+    virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
+                           VExprContext* context) override;
 };
 } // namespace vectorized
 
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index 6757b74..1c71c5b 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -24,15 +24,16 @@
 #include "exprs/anyval_util.h"
 #include "gen_cpp/Exprs_types.h"
 #include "vec/data_types/data_type_factory.hpp"
+#include "vec/exprs/varray_literal.h"
 #include "vec/exprs/vcase_expr.h"
 #include "vec/exprs/vcast_expr.h"
 #include "vec/exprs/vcompound_pred.h"
 #include "vec/exprs/vectorized_fn_call.h"
 #include "vec/exprs/vin_predicate.h"
-#include "vec/exprs/vtuple_is_null_predicate.h"
+#include "vec/exprs/vinfo_func.h"
 #include "vec/exprs/vliteral.h"
 #include "vec/exprs/vslot_ref.h"
-#include "vec/exprs/vinfo_func.h"
+#include "vec/exprs/vtuple_is_null_predicate.h"
 
 namespace doris::vectorized {
 using doris::Status;
@@ -100,6 +101,10 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const 
doris::TExprNode& texpr
         *expr = pool->add(new VLiteral(texpr_node));
         return Status::OK();
     }
+    case TExprNodeType::ARRAY_LITERAL: {
+        *expr = pool->add(new VArrayLiteral(texpr_node));
+        return Status::OK();
+    }
     case doris::TExprNodeType::SLOT_REF: {
         *expr = pool->add(new VSlotRef(texpr_node));
         break;
diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp
index 38ec4e1..c856c76 100644
--- a/be/src/vec/exprs/vliteral.cpp
+++ b/be/src/vec/exprs/vliteral.cpp
@@ -24,107 +24,104 @@
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/runtime/vdatetime_value.h"
 namespace doris::vectorized {
-VLiteral::VLiteral(const TExprNode& node) : VExpr(node) {
+
+void VLiteral::init(const TExprNode& node) {
     Field field;
     if (node.node_type != TExprNodeType::NULL_LITERAL) {
         switch (_type.type) {
-            case TYPE_BOOLEAN: {
-                DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL);
-                DCHECK(node.__isset.bool_literal);
-                field = Int8(node.bool_literal.value);
-                break;
-            }
-            case TYPE_TINYINT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int8(node.int_literal.value);
-                break;
-            }
-            case TYPE_SMALLINT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int16(node.int_literal.value);
-                break;
-            }
-            case TYPE_INT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int32(node.int_literal.value);
-                break;
-            }
-            case TYPE_BIGINT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int64(node.int_literal.value);
-                break;
-            }
-            case TYPE_LARGEINT: {
-                StringParser::ParseResult parse_result = 
StringParser::PARSE_SUCCESS;
-                DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL);
-                __int128_t value = StringParser::string_to_int<__int128>(
-                        node.large_int_literal.value.c_str(), 
node.large_int_literal.value.size(),
-                        &parse_result);
-                if (parse_result != StringParser::PARSE_SUCCESS) {
-                    value = MAX_INT128;
-                }
-                field = Int128(value);
-                break;
-            }
-            case TYPE_FLOAT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
-                DCHECK(node.__isset.float_literal);
-                field = Float32(node.float_literal.value);
-                break;
-            }
-            case TYPE_TIME:
-            case TYPE_DOUBLE: {
-                DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
-                DCHECK(node.__isset.float_literal);
-                field = Float64(node.float_literal.value);
-                break;
-            }
-            case TYPE_DATE: {
-                VecDateTimeValue value;
-                value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
-                value.cast_to_date();
-                field = Int64(*reinterpret_cast<__int64_t*>(&value));
-                break;
-            }
-            case TYPE_DATETIME: {
-                VecDateTimeValue value;
-                value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
-                value.to_datetime();
-                field = Int64(*reinterpret_cast<__int64_t*>(&value));
-                break;
-            }
-            case TYPE_STRING:
-            case TYPE_CHAR:
-            case TYPE_VARCHAR: {
-                DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
-                DCHECK(node.__isset.string_literal);
-                field = node.string_literal.value;
-                break;
-            }
-            case TYPE_DECIMALV2: {
-                DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL);
-                DCHECK(node.__isset.decimal_literal);
-                DecimalV2Value value(node.decimal_literal.value);
-                field = DecimalField<Decimal128>(value.value(), value.scale());
-                break;
-            }
-            default: {
-                DCHECK(false) << "Invalid type: " << _type.type;
-                break;
+        case TYPE_BOOLEAN: {
+            DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL);
+            DCHECK(node.__isset.bool_literal);
+            field = Int8(node.bool_literal.value);
+            break;
+        }
+        case TYPE_TINYINT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int8(node.int_literal.value);
+            break;
+        }
+        case TYPE_SMALLINT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int16(node.int_literal.value);
+            break;
+        }
+        case TYPE_INT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int32(node.int_literal.value);
+            break;
+        }
+        case TYPE_BIGINT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int64(node.int_literal.value);
+            break;
+        }
+        case TYPE_LARGEINT: {
+            StringParser::ParseResult parse_result = 
StringParser::PARSE_SUCCESS;
+            DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL);
+            __int128_t value = StringParser::string_to_int<__int128>(
+                    node.large_int_literal.value.c_str(), 
node.large_int_literal.value.size(),
+                    &parse_result);
+            if (parse_result != StringParser::PARSE_SUCCESS) {
+                value = MAX_INT128;
             }
+            field = Int128(value);
+            break;
+        }
+        case TYPE_FLOAT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
+            DCHECK(node.__isset.float_literal);
+            field = Float32(node.float_literal.value);
+            break;
+        }
+        case TYPE_TIME:
+        case TYPE_DOUBLE: {
+            DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
+            DCHECK(node.__isset.float_literal);
+            field = Float64(node.float_literal.value);
+            break;
+        }
+        case TYPE_DATE: {
+            VecDateTimeValue value;
+            value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
+            value.cast_to_date();
+            field = Int64(*reinterpret_cast<__int64_t*>(&value));
+            break;
+        }
+        case TYPE_DATETIME: {
+            VecDateTimeValue value;
+            value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
+            value.to_datetime();
+            field = Int64(*reinterpret_cast<__int64_t*>(&value));
+            break;
+        }
+        case TYPE_STRING:
+        case TYPE_CHAR:
+        case TYPE_VARCHAR: {
+            DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
+            DCHECK(node.__isset.string_literal);
+            field = node.string_literal.value;
+            break;
+        }
+        case TYPE_DECIMALV2: {
+            DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL);
+            DCHECK(node.__isset.decimal_literal);
+            DecimalV2Value value(node.decimal_literal.value);
+            field = DecimalField<Decimal128>(value.value(), value.scale());
+            break;
+        }
+        default: {
+            DCHECK(false) << "Invalid type: " << _type.type;
+            break;
+        }
         }
     }
-
-    this->_column_ptr = _data_type->create_column_const(1, field);
-    _expr_name = _data_type->get_name();
+    _column_ptr = _data_type->create_column_const(1, field);
 }
 
-VLiteral::~VLiteral() {}
-
 Status VLiteral::execute(VExprContext* context, vectorized::Block* block, int* 
result_column_id) {
     int rows = block->rows();
     if (rows < 1) {
diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/vliteral.h
index d1d3dcc..b694ab5 100644
--- a/be/src/vec/exprs/vliteral.h
+++ b/be/src/vec/exprs/vliteral.h
@@ -27,8 +27,13 @@ class TExprNode;
 namespace vectorized {
 class VLiteral : public VExpr {
 public:
-    virtual ~VLiteral();
-    VLiteral(const TExprNode& node);
+    VLiteral(const TExprNode& node, bool should_init = true)
+            : VExpr(node), _expr_name(_data_type->get_name()) {
+        if (should_init) {
+            init(node);
+        }
+    };
+    virtual ~VLiteral() = default;
     virtual Status execute(VExprContext* context, vectorized::Block* block,
                            int* result_column_id) override;
     virtual const std::string& expr_name() const override { return _expr_name; 
}
@@ -36,9 +41,12 @@ public:
         return pool->add(new VLiteral(*this));
     }
 
-private:
+protected:
     ColumnPtr _column_ptr;
     std::string _expr_name;
+
+private:
+    void init(const TExprNode& node);
 };
 } // namespace vectorized
 
diff --git a/be/src/vec/sink/mysql_result_writer.cpp 
b/be/src/vec/sink/mysql_result_writer.cpp
index e4fc56b..60fa9a4 100644
--- a/be/src/vec/sink/mysql_result_writer.cpp
+++ b/be/src/vec/sink/mysql_result_writer.cpp
@@ -107,8 +107,8 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& 
column_ptr,
             result->result_batch.rows[i].append(_buffer.buf(), 
_buffer.length());
         }
     } else if constexpr (type == TYPE_ARRAY) {
-        auto& array_column = assert_cast<const ColumnArray&>(*column);
-        auto& offsets = array_column.get_offsets();
+        auto& column_array = assert_cast<const ColumnArray&>(*column);
+        auto& offsets = column_array.get_offsets();
         for (int i = 0; i < column_size; ++i) {
             if (0 != buf_ret) {
                 return Status::InternalError("pack mysql buffer failed.");
@@ -130,7 +130,12 @@ Status VMysqlResultWriter::_add_one_column(const 
ColumnPtr& column_ptr,
                 if (!begin) {
                     buf_ret = _buffer.push_string(", ", 2);
                 }
-                buf_ret = _add_one_cell(array_column.get_data_ptr(), j, 
nested_type_ptr, _buffer);
+                const auto& data = column_array.get_data_ptr();
+                if (data->is_null_at(j)) {
+                    buf_ret = _buffer.push_string("NULL", strlen("NULL"));
+                } else {
+                    buf_ret = _add_one_cell(data, j, nested_type_ptr, _buffer);
+                }
                 begin = false;
             }
             buf_ret = _buffer.push_string("]", 1);
@@ -211,7 +216,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& 
column_ptr,
 }
 
 int VMysqlResultWriter::_add_one_cell(const ColumnPtr& column_ptr, size_t 
row_idx,
-                                         const DataTypePtr& type, 
MysqlRowBuffer& buffer) {
+                                      const DataTypePtr& type, MysqlRowBuffer& 
buffer) {
     WhichDataType which(type->get_type_id());
     if (which.is_nullable() && column_ptr->is_null_at(row_idx)) {
         return buffer.push_null();
@@ -265,6 +270,38 @@ int VMysqlResultWriter::_add_one_cell(const ColumnPtr& 
column_ptr, size_t row_id
             buf_ret = buffer.push_string(string_val.data, string_val.size);
         }
         return buf_ret;
+    } else if (which.is_array()) {
+        auto& column_array = assert_cast<const ColumnArray&>(*column);
+        auto& offsets = column_array.get_offsets();
+        DataTypePtr sub_type;
+        if (type->is_nullable()) {
+            auto& nested_type = assert_cast<const 
DataTypeNullable&>(*type).get_nested_type();
+            sub_type = assert_cast<const 
DataTypeArray&>(*nested_type).get_nested_type();
+        } else {
+            sub_type = assert_cast<const 
DataTypeArray&>(*type).get_nested_type();
+        }
+
+        int start = offsets[row_idx - 1];
+        int length = offsets[row_idx] - start;
+        const auto& data = column_array.get_data_ptr();
+
+        int buf_ret = buffer.push_string("[", strlen("["));
+        bool begin = true;
+        for (int i = 0; i < length; ++i) {
+            int position = start + i;
+            if (begin) {
+                begin = false;
+            } else {
+                buf_ret = buffer.push_string(", ", strlen(", "));
+            }
+            if (data->is_null_at(position)) {
+                buf_ret = buffer.push_string("NULL", strlen("NULL"));
+            } else {
+                buf_ret = _add_one_cell(data, position, sub_type, buffer);
+            }
+        }
+        buf_ret = buffer.push_string("]", strlen("]"));
+        return buf_ret;
     } else {
         LOG(WARNING) << "sub TypeIndex(" << (int)which.idx << "not supported 
yet";
         return -1;
@@ -408,12 +445,15 @@ Status VMysqlResultWriter::append_block(Block& 
input_block) {
         }
         case TYPE_ARRAY: {
             if (type_ptr->is_nullable()) {
-                auto& nested_type = assert_cast<const 
DataTypeNullable&>(*type_ptr).get_nested_type();
+                auto& nested_type =
+                        assert_cast<const 
DataTypeNullable&>(*type_ptr).get_nested_type();
                 auto& sub_type = assert_cast<const 
DataTypeArray&>(*nested_type).get_nested_type();
-                status = _add_one_column<PrimitiveType::TYPE_ARRAY, 
true>(column_ptr, result, sub_type);
+                status = _add_one_column<PrimitiveType::TYPE_ARRAY, 
true>(column_ptr, result,
+                                                                          
sub_type);
             } else {
                 auto& sub_type = assert_cast<const 
DataTypeArray&>(*type_ptr).get_nested_type();
-                status = _add_one_column<PrimitiveType::TYPE_ARRAY, 
false>(column_ptr, result, sub_type);
+                status = _add_one_column<PrimitiveType::TYPE_ARRAY, 
false>(column_ptr, result,
+                                                                           
sub_type);
             }
             break;
         }
diff --git a/be/test/runtime/array_test.cpp b/be/test/runtime/array_test.cpp
index 57a5cdb..40a326a 100644
--- a/be/test/runtime/array_test.cpp
+++ b/be/test/runtime/array_test.cpp
@@ -31,6 +31,7 @@
 #include "olap/field.h"
 #include "olap/fs/block_manager.h"
 #include "olap/fs/fs_util.h"
+#include "olap/row_block2.h"
 #include "olap/rowset/segment_v2/column_reader.h"
 #include "olap/rowset/segment_v2/column_writer.h"
 #include "olap/tablet_schema.h"
@@ -39,9 +40,11 @@
 #include "runtime/mem_pool.h"
 #include "runtime/mem_tracker.h"
 #include "runtime/raw_value.h"
+#include "test_util/array_utils.h"
 #include "testutil/desc_tbl_builder.h"
 #include "util/file_utils.h"
 #include "util/uid_util.h"
+#include "vec/core/block.h"
 
 namespace doris {
 
@@ -103,86 +106,19 @@ TupleDescriptor* get_tuple_descriptor(ObjectPool& 
object_pool, const TypeInfo* t
     return builder.build()->get_tuple_descriptor(0);
 }
 
-CollectionValue* parse(ObjectPool& object_pool,
-                       const 
rapidjson::GenericValue<rapidjson::UTF8<>>::ConstArray& json_array,
-                       const TypeDescriptor& type_desc) {
-    if (json_array.Empty()) {
-        return object_pool.add(new CollectionValue(0));
-    } else {
-        auto array = object_pool.add(new CollectionValue());
-        const auto& item_type_desc = type_desc.children[0];
-        CollectionValue::init_collection(&object_pool, json_array.Size(), 
item_type_desc.type,
-                                         array);
-        int index = 0;
-        switch (item_type_desc.type) {
-        case TYPE_ARRAY:
-            for (auto it = json_array.Begin(); it != json_array.End(); ++it) {
-                auto val = CollectionVal();
-                if (it->IsNull()) {
-                    val.is_null = true;
-                } else {
-                    auto sub_array = parse(object_pool, it->GetArray(), 
item_type_desc);
-                    sub_array->to_collection_val(&val);
-                }
-                array->set(index++, item_type_desc.type, &val);
-            }
-            break;
-        case TYPE_INT:
-            for (auto it = json_array.Begin(); it != json_array.End(); ++it) {
-                auto val = it->IsNull() ? IntVal::null() : 
IntVal(it->GetInt());
-                array->set(index++, item_type_desc.type, &val);
-            }
-            break;
-        case TYPE_VARCHAR:
-            for (auto it = json_array.Begin(); it != json_array.End(); ++it) {
-                if (it->IsNull()) {
-                    auto val = StringVal::null();
-                    array->set(index++, item_type_desc.type, &val);
-                } else {
-                    char* string = object_pool.add_array(new 
char[it->GetStringLength()]);
-                    memcpy(string, it->GetString(), it->GetStringLength());
-                    auto val = StringVal(reinterpret_cast<uint8_t*>(string), 
it->GetStringLength());
-                    array->set(index++, item_type_desc.type, &val);
-                }
-            }
-            break;
-        default:
-            break;
-        }
-        if (!array->has_null()) {
-            array->set_null_signs(nullptr);
-        }
-        return array;
-    }
-}
-
-CollectionValue* parse(ObjectPool& object_pool, const std::string& text,
-                       const TypeDescriptor& type_desc) {
-    rapidjson::Document document;
-    if (document.Parse(text.c_str()).HasParseError() || !document.IsArray()) {
+CollectionValue* parse(MemPool& mem_pool, FunctionContext& context, const 
std::string& text,
+                       const ColumnPB& column_pb) {
+    auto collection_value =
+            
reinterpret_cast<CollectionValue*>(mem_pool.allocate(sizeof(CollectionValue)));
+    auto status = ArrayUtils::create_collection_value(collection_value, 
&context, text);
+    if (!status.ok()) {
         return nullptr;
     }
-    return parse(object_pool, (const_cast<const 
rapidjson::Document*>(&document))->GetArray(),
-                 type_desc);
-}
-
-void validate(const Field* field, const CollectionValue* expect, const 
CollectionValue* actual,
-              bool check_nullptr) {
-    EXPECT_TRUE(field->type_info()->equal(expect, actual));
-    if (check_nullptr) {
-        if (expect->length() == 0) {
-            EXPECT_EQ(nullptr, actual->data());
-            EXPECT_EQ(expect->data(), actual->data());
-        }
-        if (!expect->has_null()) {
-            EXPECT_EQ(nullptr, expect->null_signs());
-            EXPECT_EQ(expect->null_signs(), actual->null_signs());
-        }
-    }
+    return collection_value;
 }
 
 void validate(const Field* field, const CollectionValue* expect, const 
CollectionValue* actual) {
-    validate(field, expect, actual, true);
+    EXPECT_TRUE(field->type_info()->equal(expect, actual));
 }
 
 class ArrayTest : public ::testing::Test {
@@ -254,7 +190,7 @@ private:
         for (auto array : arrays) {
             field->type_info()->direct_copy(&cell, array);
             EXPECT_EQ(cell.null_signs(), 
reinterpret_cast<bool*>(variable_ptr.get()));
-            validate(field, array, &cell, false);
+            validate(field, array, &cell);
         }
     }
 
@@ -263,8 +199,13 @@ private:
                                     const std::vector<const CollectionValue*>& 
arrays) {
         const std::string path = TEST_DIR + "/" + generate_uuid_string();
         LOG(INFO) << "Test directory: " << path;
+
         segment_v2::ColumnMetaPB meta;
         init_column_meta<array_encoding, item_encoding>(&meta, column_pb);
+
+        TabletColumn tablet_column;
+        tablet_column.init_from_pb(column_pb);
+        Schema schema({tablet_column}, 0);
         {
             auto wblock = create_writable_block(path);
             ASSERT_NE(wblock, nullptr);
@@ -295,14 +236,8 @@ private:
             auto st = iter->seek_to_first();
             ASSERT_TRUE(st.ok()) << st.to_string();
 
-            auto tracker = std::make_shared<MemTracker>();
-            MemPool pool(tracker.get());
-            std::unique_ptr<ColumnVectorBatch> cvb;
-            ColumnVectorBatch::create(0, true, field->type_info(), 
const_cast<Field*>(field), &cvb);
-            ASSERT_NE(cvb, nullptr) << st.to_string();
-            cvb->resize(1024);
-            ColumnBlock col(cvb.get(), &pool);
-
+            RowBlockV2 block(schema, 1024);
+            auto col = block.column_block(0);
             int index = 0;
             size_t rows_read = 1024;
             do {
@@ -311,12 +246,17 @@ private:
                 ASSERT_TRUE(st.ok());
                 for (int i = 0; i < rows_read; ++i) {
                     validate(field, arrays[index++],
-                             reinterpret_cast<const 
CollectionValue*>(col.cell_ptr(i)), false);
+                             reinterpret_cast<const 
CollectionValue*>(col.cell_ptr(i)));
                 }
                 ASSERT_TRUE(st.ok());
             } while (rows_read >= 1024);
+
+            auto tuple_desc = get_tuple_descriptor(_object_pool, 
get_type_info(column_pb).get());
+            block.set_selected_size(rows_read);
+            test_convert_to_vec_block(block, tuple_desc, field, arrays);
         }
     }
+
     template <segment_v2::EncodingTypePB array_encoding, 
segment_v2::EncodingTypePB item_encoding>
     void init_column_meta(segment_v2::ColumnMetaPB* meta, const ColumnPB& 
column_pb) {
         int column_id = 0;
@@ -405,11 +345,30 @@ private:
     template <segment_v2::EncodingTypePB array_encoding, 
segment_v2::EncodingTypePB item_encoding>
     void test_array(const ColumnPB& column_pb, const Field* field,
                     const TupleDescriptor* tuple_desc, const CollectionValue* 
array) {
+        ASSERT_NE(array, nullptr);
         test_copy_array(tuple_desc, field, array);
         test_direct_copy_array(field, {array});
         test_write_and_read_column<array_encoding, item_encoding>(column_pb, 
field, {array});
     }
 
+    void test_convert_to_vec_block(RowBlockV2& row_block, const 
TupleDescriptor* tuple_desc,
+                                   const Field* field,
+                                   const std::vector<const CollectionValue*>& 
arrays) {
+        vectorized::Block block;
+        for (const auto slot_desc : tuple_desc->slots()) {
+            
block.insert(vectorized::ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(),
+                                                           
slot_desc->get_data_type_ptr(),
+                                                           
slot_desc->col_name()));
+        }
+
+        row_block.convert_to_vec_block(&block);
+        for (int i = 0; i < arrays.size(); ++i) {
+            auto tuple = block.deep_copy_tuple(*tuple_desc, _mem_pool.get(), 
i, 0, false);
+            auto actual = 
tuple->get_collection_slot(tuple_desc->slots().front()->tuple_offset());
+            validate(field, arrays[i], actual);
+        }
+    }
+
 private:
     static constexpr size_t MAX_MEMORY_BYTES = 1024 * 1024;
     static const std::string TEST_DIR;
@@ -426,16 +385,17 @@ TEST_F(ArrayTest, TestSimpleIntArrays) {
     auto field = create_field(column_pb);
     auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get());
     ASSERT_EQ(tuple_desc->slots().size(), 1);
-    auto type_desc = tuple_desc->slots().front()->type();
+    FunctionContext context;
+    ArrayUtils::prepare_context(context, *_mem_pool, column_pb);
 
     std::vector<const CollectionValue*> arrays = {
-            parse(_object_pool, "[]", type_desc),
-            parse(_object_pool, "[null]", type_desc),
-            parse(_object_pool, "[1, 2, 3]", type_desc),
-            parse(_object_pool, "[1, null, 3]", type_desc),
-            parse(_object_pool, "[1, null, null]", type_desc),
-            parse(_object_pool, "[null, null, 3]", type_desc),
-            parse(_object_pool, "[null, null, null]", type_desc),
+            parse(*_mem_pool, context, "[]", column_pb),
+            parse(*_mem_pool, context, "[null]", column_pb),
+            parse(*_mem_pool, context, "[1, 2, 3]", column_pb),
+            parse(*_mem_pool, context, "[1, null, 3]", column_pb),
+            parse(*_mem_pool, context, "[1, null, null]", column_pb),
+            parse(*_mem_pool, context, "[null, null, 3]", column_pb),
+            parse(*_mem_pool, context, "[null, null, null]", column_pb),
     };
     for (auto array : arrays) {
         test_array<segment_v2::DEFAULT_ENCODING, 
segment_v2::BIT_SHUFFLE>(column_pb, field.get(),
@@ -453,15 +413,16 @@ TEST_F(ArrayTest, TestNestedIntArrays) {
     auto field = create_field(column_pb);
     auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get());
     ASSERT_EQ(tuple_desc->slots().size(), 1);
-    auto type_desc = tuple_desc->slots().front()->type();
+    auto context = std::make_unique<FunctionContext>();
+    ArrayUtils::prepare_context(*context, *_mem_pool, column_pb);
 
     std::vector<const CollectionValue*> arrays = {
-            parse(_object_pool, "[]", type_desc),
-            parse(_object_pool, "[[]]", type_desc),
-            parse(_object_pool, "[[1, 2, 3], [4, 5, 6]]", type_desc),
-            parse(_object_pool, "[[1, 2, 3], null, [4, 5, 6]]", type_desc),
-            parse(_object_pool, "[[1, 2, null], null, [4, null, 6], null, 
[null, 8, 9]]",
-                  type_desc),
+            parse(*_mem_pool, *context, "[]", column_pb),
+            parse(*_mem_pool, *context, "[[]]", column_pb),
+            parse(*_mem_pool, *context, "[[1, 2, 3], [4, 5, 6]]", column_pb),
+            parse(*_mem_pool, *context, "[[1, 2, 3], null, [4, 5, 6]]", 
column_pb),
+            parse(*_mem_pool, *context, "[[1, 2, null], null, [4, null, 6], 
null, [null, 8, 9]]",
+                  column_pb),
     };
     for (auto array : arrays) {
         test_array<segment_v2::DEFAULT_ENCODING, 
segment_v2::BIT_SHUFFLE>(column_pb, field.get(),
@@ -477,15 +438,17 @@ TEST_F(ArrayTest, TestNestedIntArrays) {
     field = create_field(column_pb);
     tuple_desc = get_tuple_descriptor(_object_pool, type_info.get());
     ASSERT_EQ(tuple_desc->slots().size(), 1);
-    type_desc = tuple_desc->slots().front()->type();
     arrays.clear();
     ASSERT_EQ(arrays.size(), 0);
+    context.reset(new FunctionContext);
+    ArrayUtils::prepare_context(*context, *_mem_pool, column_pb);
 
     arrays = {
-            parse(_object_pool, "[]", type_desc),
-            parse(_object_pool, "[[]]", type_desc),
-            parse(_object_pool, "[[[]]]", type_desc),
-            parse(_object_pool, "[[[null]], [[1], [2, 3]], [[4, 5, 6], null, 
null]]", type_desc),
+            parse(*_mem_pool, *context, "[]", column_pb),
+            parse(*_mem_pool, *context, "[[]]", column_pb),
+            parse(*_mem_pool, *context, "[[[]]]", column_pb),
+            parse(*_mem_pool, *context, "[[[null]], [[1], [2, 3]], [[4, 5, 6], 
null, null]]",
+                  column_pb),
     };
     for (auto array : arrays) {
         test_array<segment_v2::DEFAULT_ENCODING, 
segment_v2::BIT_SHUFFLE>(column_pb, field.get(),
@@ -502,17 +465,18 @@ TEST_F(ArrayTest, TestSimpleStringArrays) {
     auto field = create_field(column_pb);
     auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get());
     ASSERT_EQ(tuple_desc->slots().size(), 1);
-    auto type_desc = tuple_desc->slots().front()->type();
+    FunctionContext context;
+    ArrayUtils::prepare_context(context, *_mem_pool, column_pb);
 
     std::vector<const CollectionValue*> arrays = {
-            parse(_object_pool, "[]", type_desc),
-            parse(_object_pool, "[null]", type_desc),
-            parse(_object_pool, "[\"a\", \"b\", \"c\"]", type_desc),
-            parse(_object_pool, "[null, \"b\", \"c\"]", type_desc),
-            parse(_object_pool, "[\"a\", null, \"c\"]", type_desc),
-            parse(_object_pool, "[\"a\", \"b\", null]", type_desc),
-            parse(_object_pool, "[null, \"b\", null]", type_desc),
-            parse(_object_pool, "[null, null, null]", type_desc),
+            parse(*_mem_pool, context, "[]", column_pb),
+            parse(*_mem_pool, context, "[null]", column_pb),
+            parse(*_mem_pool, context, "[\"a\", \"b\", \"c\"]", column_pb),
+            parse(*_mem_pool, context, "[null, \"b\", \"c\"]", column_pb),
+            parse(*_mem_pool, context, "[\"a\", null, \"c\"]", column_pb),
+            parse(*_mem_pool, context, "[\"a\", \"b\", null]", column_pb),
+            parse(*_mem_pool, context, "[null, \"b\", null]", column_pb),
+            parse(*_mem_pool, context, "[null, null, null]", column_pb),
     };
     for (auto array : arrays) {
         test_array<segment_v2::DEFAULT_ENCODING, 
segment_v2::DICT_ENCODING>(column_pb, field.get(),
@@ -529,15 +493,16 @@ TEST_F(ArrayTest, TestNestedStringArrays) {
     auto field = create_field(column_pb);
     auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get());
     ASSERT_EQ(tuple_desc->slots().size(), 1);
-    auto type_desc = tuple_desc->slots().front()->type();
+    FunctionContext context;
+    ArrayUtils::prepare_context(context, *_mem_pool, column_pb);
 
     std::vector<const CollectionValue*> arrays = {
-            parse(_object_pool, "[]", type_desc),
-            parse(_object_pool, "[[]]", type_desc),
-            parse(_object_pool, "[[[]]]", type_desc),
-            parse(_object_pool, "[null, [null], [[null]]]", type_desc),
-            parse(_object_pool, "[[[\"a\", null, \"c\"], [\"d\", \"e\", 
\"f\"]], null, [[\"g\"]]]",
-                  type_desc),
+            parse(*_mem_pool, context, "[]", column_pb),
+            parse(*_mem_pool, context, "[[]]", column_pb),
+            parse(*_mem_pool, context, "[[[]]]", column_pb),
+            parse(*_mem_pool, context, "[null, [null], [[null]]]", column_pb),
+            parse(*_mem_pool, context,
+                  "[[[\"a\", null, \"c\"], [\"d\", \"e\", \"f\"]], null, 
[[\"g\"]]]", column_pb),
     };
     for (auto array : arrays) {
         test_array<segment_v2::DEFAULT_ENCODING, 
segment_v2::DICT_ENCODING>(column_pb, field.get(),
diff --git a/be/test/test_util/CMakeLists.txt b/be/test/test_util/CMakeLists.txt
index 2d80269..223df85 100644
--- a/be/test/test_util/CMakeLists.txt
+++ b/be/test/test_util/CMakeLists.txt
@@ -20,10 +20,12 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/test/test_util")
 
 set(TEST_UTIL_FILES
   test_util.cpp
+  array_utils.cpp
 )
 
 add_library(Test_util STATIC
     ${TEST_UTIL_FILES}
 )
 
+target_compile_options(Test_util PRIVATE "-fno-access-control")
 target_link_libraries(Test_util Common Util Gutil ${Boost_LIBRARIES} glog 
gflags protobuf)
diff --git a/be/test/test_util/array_utils.cpp 
b/be/test/test_util/array_utils.cpp
new file mode 100644
index 0000000..4fb0da5
--- /dev/null
+++ b/be/test/test_util/array_utils.cpp
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "test_util/array_utils.h"
+
+#include "common/status.h"
+#include "exprs/anyval_util.h"
+#include "gen_cpp/olap_file.pb.h"
+#include "runtime/collection_value.h"
+#include "runtime/free_pool.hpp"
+#include "runtime/mem_pool.h"
+#include "runtime/mem_tracker.h"
+#include "udf/udf_internal.h"
+#include "util/array_parser.hpp"
+
+namespace doris {
+
+using TypeDesc = FunctionContext::TypeDesc;
+
+void ArrayUtils::prepare_context(FunctionContext& context, MemPool& mem_pool,
+                                 const ColumnPB& column_pb) {
+    auto function_type_desc = create_function_type_desc(column_pb);
+    context.impl()->_return_type = function_type_desc;
+    context.impl()->_pool = new FreePool(&mem_pool);
+}
+
+Status ArrayUtils::create_collection_value(CollectionValue* collection_value,
+                                           FunctionContext* context,
+                                           const std::string& json_string) {
+    CollectionVal collection_val;
+    auto status = ArrayParser::parse(collection_val, context, 
StringVal(json_string.c_str()));
+    if (!status.ok()) {
+        return status;
+    }
+    new (collection_value) CollectionValue(collection_val.data, 
collection_val.length,
+                                           collection_val.has_null, 
collection_val.null_signs);
+    return Status::OK();
+}
+
+TypeDesc ArrayUtils::create_function_type_desc(const ColumnPB& column_pb) {
+    TypeDesc type_desc;
+    type_desc.len = column_pb.length();
+    type_desc.precision = column_pb.precision();
+    type_desc.scale = column_pb.frac();
+    if (column_pb.type() == "ARRAY") {
+        type_desc.type = FunctionContext::TYPE_ARRAY;
+    } else if (column_pb.type() == "INT") {
+        type_desc.type = FunctionContext::TYPE_INT;
+    } else if (column_pb.type() == "VARCHAR") {
+        type_desc.type = FunctionContext::TYPE_VARCHAR;
+    }
+    for (const auto& sub_column_pb : column_pb.children_columns()) {
+        type_desc.children.push_back(create_function_type_desc(sub_column_pb));
+    }
+    return type_desc;
+}
+
+} // namespace doris
diff --git a/be/src/vec/exprs/vliteral.h b/be/test/test_util/array_utils.h
similarity index 57%
copy from be/src/vec/exprs/vliteral.h
copy to be/test/test_util/array_utils.h
index d1d3dcc..41503dc 100644
--- a/be/src/vec/exprs/vliteral.h
+++ b/be/test/test_util/array_utils.h
@@ -17,29 +17,27 @@
 
 #pragma once
 
-#include "vec/columns/column.h"
-#include "vec/columns/column_const.h"
-#include "vec/exprs/vexpr.h"
+#include <string>
+
+#include "udf/udf.h"
 
 namespace doris {
-class TExprNode;
 
-namespace vectorized {
-class VLiteral : public VExpr {
+class ColumnPB;
+class MemPool;
+class Status;
+struct CollectionValue;
+
+class ArrayUtils {
 public:
-    virtual ~VLiteral();
-    VLiteral(const TExprNode& node);
-    virtual Status execute(VExprContext* context, vectorized::Block* block,
-                           int* result_column_id) override;
-    virtual const std::string& expr_name() const override { return _expr_name; 
}
-    virtual VExpr* clone(doris::ObjectPool* pool) const override {
-        return pool->add(new VLiteral(*this));
-    }
+    using TypeDesc = FunctionContext::TypeDesc;
+    static void prepare_context(FunctionContext& context, MemPool& mem_pool,
+                                const ColumnPB& column_pb);
+    static Status create_collection_value(CollectionValue* collection_value,
+                                          FunctionContext* context, const 
std::string& json_string);
 
 private:
-    ColumnPtr _column_ptr;
-    std::string _expr_name;
+    static TypeDesc create_function_type_desc(const ColumnPB& column_pb);
 };
-} // namespace vectorized
 
 } // namespace doris
diff --git a/be/test/util/array_parser_test.cpp 
b/be/test/util/array_parser_test.cpp
index cbda494..da14102 100644
--- a/be/test/util/array_parser_test.cpp
+++ b/be/test/util/array_parser_test.cpp
@@ -19,111 +19,102 @@
 
 #include <memory>
 #include <string>
-#include <util/array_parser.hpp>
 
-#include "gutil/casts.h"
 #include "olap/types.h"
-#include "runtime/free_pool.hpp"
-#include "runtime/mem_pool.h"
 #include "runtime/mem_tracker.h"
 #include "runtime/string_value.h"
-#include "udf/udf.h"
-#include "udf/udf_internal.h"
+#include "test_util/array_utils.h"
 
 namespace doris {
 
-using TypeDesc = FunctionContext::TypeDesc;
-
 template <typename... Ts>
-TypeDesc create_function_type_desc(FunctionContext::Type type, Ts... 
sub_types) {
-    TypeDesc type_desc = {.type = type,
-                          .len = (type == FunctionContext::TYPE_ARRAY) ? 
OLAP_ARRAY_MAX_BYTES : 0};
-    if constexpr (sizeof...(sub_types)) {
-        type_desc.children.push_back(create_function_type_desc(sub_types...));
-    }
-    return type_desc;
-}
-
-ColumnPB create_column_pb(const TypeDesc& function_type_desc) {
-    ColumnPB column_pb;
-    column_pb.set_length(function_type_desc.len);
-    switch (function_type_desc.type) {
-    case FunctionContext::TYPE_ARRAY:
-        column_pb.set_type("ARRAY");
-        break;
-    case FunctionContext::TYPE_INT:
-        column_pb.set_type("INT");
-        break;
-    case FunctionContext::TYPE_VARCHAR:
-        column_pb.set_type("VARCHAR");
-        break;
-    default:
-        break;
+ColumnPB create_column_pb(const std::string& type, const Ts&... 
sub_column_types) {
+    ColumnPB column;
+    column.set_type(type);
+    column.set_aggregation("NONE");
+    column.set_is_nullable(true);
+    if (type == "ARRAY") {
+        column.set_length(OLAP_ARRAY_MAX_BYTES);
     }
-    for (auto child_type_desc : function_type_desc.children) {
-        auto sub_column_pb = create_column_pb(child_type_desc);
-        column_pb.add_children_columns()->Swap(&sub_column_pb);
+    if constexpr (sizeof...(sub_column_types) > 0) {
+        auto sub_column = create_column_pb(sub_column_types...);
+        column.add_children_columns()->Swap(&sub_column);
     }
-    return column_pb;
+    return column;
 }
 
-std::shared_ptr<const TypeInfo> get_type_info(const TypeDesc& 
function_type_desc) {
-    auto column_pb = create_column_pb(function_type_desc);
+std::shared_ptr<const TypeInfo> get_type_info(const ColumnPB& column_pb) {
     TabletColumn tablet_column;
     tablet_column.init_from_pb(column_pb);
     return get_type_info(&tablet_column);
 }
 
-void test_array_parser(const TypeDesc& function_type_desc, const std::string& 
json,
+void test_array_parser(const ColumnPB& column_pb, const std::string& json,
                        const CollectionValue& expect) {
     MemTracker tracker(1024 * 1024, "ArrayParserTest");
     MemPool mem_pool(&tracker);
-    std::unique_ptr<FunctionContext> function_context(new FunctionContext());
-    function_context->impl()->_return_type = function_type_desc;
-    function_context->impl()->_pool = new FreePool(&mem_pool);
-    CollectionVal collection_val;
-    auto status =
-            ArrayParser::parse(collection_val, function_context.get(), 
StringVal(json.c_str()));
-    EXPECT_TRUE(status.ok());
-    auto actual = CollectionValue::from_collection_val(collection_val);
-    EXPECT_TRUE(get_type_info(function_type_desc)->equal(&expect, &actual));
+    FunctionContext context;
+    ArrayUtils::prepare_context(context, mem_pool, column_pb);
+    CollectionValue actual;
+    auto status = ArrayUtils::create_collection_value(&actual, &context, json);
+    ASSERT_TRUE(status.ok());
+    EXPECT_TRUE(get_type_info(column_pb)->equal(&expect, &actual));
 }
 
 TEST(ArrayParserTest, TestParseIntArray) {
-    auto function_type_desc =
-            create_function_type_desc(FunctionContext::TYPE_ARRAY, 
FunctionContext::TYPE_INT);
-    test_array_parser(function_type_desc, "[]", CollectionValue(0));
+    auto column_pb = create_column_pb("ARRAY", "INT");
+    test_array_parser(column_pb, "[]", CollectionValue(0));
 
-    int num_items = 3;
-    std::unique_ptr<int32_t[]> data(new int32_t[num_items] {1, 2, 3});
-    CollectionValue value(data.get(), num_items, false, nullptr);
-    test_array_parser(function_type_desc, "[1, 2, 3]", value);
+    int32_t data[] = {1, 2, 3};
+    int num_items = sizeof(data) / sizeof(data[0]);
+    CollectionValue value(data, num_items, false, nullptr);
+    test_array_parser(column_pb, "[1, 2, 3]", value);
 
-    std::unique_ptr<bool[]> null_signs(new bool[num_items] {false, true, 
false});
+    bool null_signs[] = {false, true, false};
     value.set_has_null(true);
-    value.set_null_signs(null_signs.get());
-    test_array_parser(function_type_desc, "[1, null, 3]", value);
+    value.set_null_signs(null_signs);
+    test_array_parser(column_pb, "[1, null, 3]", value);
 }
 
 TEST(ArrayParserTest, TestParseVarcharArray) {
-    auto function_type_desc =
-            create_function_type_desc(FunctionContext::TYPE_ARRAY, 
FunctionContext::TYPE_VARCHAR);
-    test_array_parser(function_type_desc, "[]", CollectionValue(0));
+    auto column_pb = create_column_pb("ARRAY", "VARCHAR");
+    test_array_parser(column_pb, "[]", CollectionValue(0));
 
-    int num_items = 3;
-    std::unique_ptr<char[]> data(new char[num_items] {'a', 'b', 'c'});
-    std::unique_ptr<StringValue[]> string_values(new StringValue[num_items] {
+    char data[] = {'a', 'b', 'c'};
+    int num_items = sizeof(data) / sizeof(data[0]);
+    StringValue string_values[] = {
             {&data[0], 1},
             {&data[1], 1},
             {&data[2], 1},
-    });
-    CollectionValue value(string_values.get(), num_items, false, nullptr);
-    test_array_parser(function_type_desc, "[\"a\", \"b\", \"c\"]", value);
+    };
+    CollectionValue value(string_values, num_items, false, nullptr);
+    test_array_parser(column_pb, "[\"a\", \"b\", \"c\"]", value);
 
-    std::unique_ptr<bool[]> null_signs(new bool[num_items] {false, true, 
false});
+    bool null_signs[] = {false, true, false};
     value.set_has_null(true);
-    value.set_null_signs(null_signs.get());
-    test_array_parser(function_type_desc, "[\"a\", null, \"c\"]", value);
+    value.set_null_signs(null_signs);
+    test_array_parser(column_pb, "[\"a\", null, \"c\"]", value);
+}
+
+TEST(ArrayParserTest, TestNestedArray) {
+    auto column_pb = create_column_pb("ARRAY", "ARRAY", "INT");
+    test_array_parser(column_pb, "[]", CollectionValue(0));
+
+    CollectionValue empty_array(0);
+    test_array_parser(column_pb, "[[]]", {&empty_array, 1, false, nullptr});
+
+    int data[] = {1, 0, 3};
+    uint32_t num_items = sizeof(data) / sizeof(data[0]);
+    bool null_signs[] = {false, true, false};
+    CollectionValue array = {data, num_items, true, null_signs};
+
+    CollectionValue array_data[] = {empty_array, array, empty_array, array};
+    uint32_t num_arrays = sizeof(array_data) / sizeof(array_data[0]);
+    test_array_parser(column_pb, "[[], [1, null, 3], [], [1, null, 3]]",
+                      {array_data, num_arrays, false, nullptr});
+    bool array_null_signs[] = {false, true, true, false};
+    test_array_parser(column_pb, "[[], null, null, [1, null, 3]]",
+                      {array_data, num_arrays, true, array_null_signs});
 }
 
 } // namespace doris

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to