HappenLee commented on a change in pull request #8494:
URL: https://github.com/apache/incubator-doris/pull/8494#discussion_r827780130



##########
File path: be/src/vec/core/block.cpp
##########
@@ -701,63 +702,102 @@ doris::Tuple* Block::deep_copy_tuple(const 
doris::TupleDescriptor& desc, MemPool
 
     for (int i = 0; i < desc.slots().size(); ++i) {
         auto slot_desc = desc.slots()[i];
-        auto data_ref = get_by_position(column_offset + 
i).column->get_data_at(row);
-
-        if (data_ref.data == nullptr) {
+        auto column = get_by_position(column_offset + i).column;
+        Field field;
+        column->get(row, field);
+        if (field.is_null()) {
             dst->set_null(slot_desc->null_indicator_offset());
-            continue;
         } else {
             dst->set_not_null(slot_desc->null_indicator_offset());
+            deep_copy_slot(dst->get_slot(slot_desc->tuple_offset()), 
slot_desc->type(), pool,
+                           column.get(), row, padding_char);
         }
+    }
+    return dst;
+}
 
-        if (!slot_desc->type().is_string_type() && 
!slot_desc->type().is_date_type()) {
-            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), 
data_ref.data, data_ref.size);
-        } else if (slot_desc->type().is_string_type() && slot_desc->type() != 
TYPE_OBJECT &&
-                   slot_desc->type() != TYPE_HLL) {
-            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), (const 
void*)(&data_ref),
-                   sizeof(data_ref));
-            // Copy the content of string
-            if (padding_char && slot_desc->type() == TYPE_CHAR) {
-                // serialize the content of string
-                auto string_slot = 
dst->get_string_slot(slot_desc->tuple_offset());
-                string_slot->ptr = 
reinterpret_cast<char*>(pool->allocate(slot_desc->type().len));
-                string_slot->len = slot_desc->type().len;
-                memset(string_slot->ptr, 0, slot_desc->type().len);
-                memcpy(string_slot->ptr, data_ref.data, data_ref.size);
-            } else {
-                auto str_ptr = pool->allocate(data_ref.size);
-                memcpy(str_ptr, data_ref.data, data_ref.size);
-                dst->get_string_slot(slot_desc->tuple_offset())->ptr =
-                        reinterpret_cast<char*>(str_ptr);
-            }
-        } else if (slot_desc->type() == TYPE_OBJECT) {
-            auto bitmap_value = (BitmapValue*)(data_ref.data);
-            auto size = bitmap_value->getSizeInBytes();
-
+void Block::deep_copy_slot(void* dst, const doris::TypeDescriptor& type_desc, 
MemPool* pool,
+                           const IColumn* column, int row, bool padding_char) {

Review comment:
       The overall code abstraction is not good. If other types need to use 
`get_ data_at` method, you should use additional code to deal with the logic of 
array

##########
File path: be/src/vec/exprs/vliteral.cpp
##########
@@ -24,107 +24,127 @@
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/runtime/vdatetime_value.h"
 namespace doris::vectorized {
-VLiteral::VLiteral(const TExprNode& node) : VExpr(node) {
+VLiteral::VLiteral(const TExprNode& node) : VExpr(node), 
_expr_name(_data_type->get_name()) {
     Field field;
     if (node.node_type != TExprNodeType::NULL_LITERAL) {
         switch (_type.type) {
-            case TYPE_BOOLEAN: {
-                DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL);
-                DCHECK(node.__isset.bool_literal);
-                field = Int8(node.bool_literal.value);
-                break;
-            }
-            case TYPE_TINYINT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int8(node.int_literal.value);
-                break;
-            }
-            case TYPE_SMALLINT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int16(node.int_literal.value);
-                break;
-            }
-            case TYPE_INT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int32(node.int_literal.value);
-                break;
-            }
-            case TYPE_BIGINT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
-                DCHECK(node.__isset.int_literal);
-                field = Int64(node.int_literal.value);
-                break;
-            }
-            case TYPE_LARGEINT: {
-                StringParser::ParseResult parse_result = 
StringParser::PARSE_SUCCESS;
-                DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL);
-                __int128_t value = StringParser::string_to_int<__int128>(
-                        node.large_int_literal.value.c_str(), 
node.large_int_literal.value.size(),
-                        &parse_result);
-                if (parse_result != StringParser::PARSE_SUCCESS) {
-                    value = MAX_INT128;
-                }
-                field = Int128(value);
-                break;
-            }
-            case TYPE_FLOAT: {
-                DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
-                DCHECK(node.__isset.float_literal);
-                field = Float32(node.float_literal.value);
-                break;
-            }
-            case TYPE_TIME:
-            case TYPE_DOUBLE: {
-                DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
-                DCHECK(node.__isset.float_literal);
-                field = Float64(node.float_literal.value);
-                break;
-            }
-            case TYPE_DATE: {
-                VecDateTimeValue value;
-                value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
-                value.cast_to_date();
-                field = Int64(*reinterpret_cast<__int64_t*>(&value));
-                break;
-            }
-            case TYPE_DATETIME: {
-                VecDateTimeValue value;
-                value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
-                value.to_datetime();
-                field = Int64(*reinterpret_cast<__int64_t*>(&value));
-                break;
-            }
-            case TYPE_STRING:
-            case TYPE_CHAR:
-            case TYPE_VARCHAR: {
-                DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
-                DCHECK(node.__isset.string_literal);
-                field = node.string_literal.value;
-                break;
-            }
-            case TYPE_DECIMALV2: {
-                DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL);
-                DCHECK(node.__isset.decimal_literal);
-                DecimalV2Value value(node.decimal_literal.value);
-                field = DecimalField<Decimal128>(value.value(), value.scale());
-                break;
-            }
-            default: {
-                DCHECK(false) << "Invalid type: " << _type.type;
-                break;
+        case TYPE_BOOLEAN: {
+            DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL);
+            DCHECK(node.__isset.bool_literal);
+            field = Int8(node.bool_literal.value);
+            break;
+        }
+        case TYPE_TINYINT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int8(node.int_literal.value);
+            break;
+        }
+        case TYPE_SMALLINT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int16(node.int_literal.value);
+            break;
+        }
+        case TYPE_INT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int32(node.int_literal.value);
+            break;
+        }
+        case TYPE_BIGINT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+            DCHECK(node.__isset.int_literal);
+            field = Int64(node.int_literal.value);
+            break;
+        }
+        case TYPE_LARGEINT: {
+            StringParser::ParseResult parse_result = 
StringParser::PARSE_SUCCESS;
+            DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL);
+            __int128_t value = StringParser::string_to_int<__int128>(
+                    node.large_int_literal.value.c_str(), 
node.large_int_literal.value.size(),
+                    &parse_result);
+            if (parse_result != StringParser::PARSE_SUCCESS) {
+                value = MAX_INT128;
             }
+            field = Int128(value);
+            break;
+        }
+        case TYPE_FLOAT: {
+            DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
+            DCHECK(node.__isset.float_literal);
+            field = Float32(node.float_literal.value);
+            break;
+        }
+        case TYPE_TIME:
+        case TYPE_DOUBLE: {
+            DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
+            DCHECK(node.__isset.float_literal);
+            field = Float64(node.float_literal.value);
+            break;
+        }
+        case TYPE_DATE: {
+            VecDateTimeValue value;
+            value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
+            value.cast_to_date();
+            field = Int64(*reinterpret_cast<__int64_t*>(&value));
+            break;
+        }
+        case TYPE_DATETIME: {
+            VecDateTimeValue value;
+            value.from_date_str(node.date_literal.value.c_str(), 
node.date_literal.value.size());
+            value.to_datetime();
+            field = Int64(*reinterpret_cast<__int64_t*>(&value));
+            break;
+        }
+        case TYPE_STRING:
+        case TYPE_CHAR:
+        case TYPE_VARCHAR: {
+            DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
+            DCHECK(node.__isset.string_literal);
+            field = node.string_literal.value;
+            break;
+        }
+        case TYPE_DECIMALV2: {
+            DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL);
+            DCHECK(node.__isset.decimal_literal);
+            DecimalV2Value value(node.decimal_literal.value);
+            field = DecimalField<Decimal128>(value.value(), value.scale());
+            break;
+        }
+        case TYPE_ARRAY: {
+            DCHECK_EQ(node.node_type, TExprNodeType::ARRAY_LITERAL);
+            // init in prepare

Review comment:
       why init in prepare, look strange?

##########
File path: be/src/vec/data_types/data_type_factory.cpp
##########
@@ -32,7 +32,7 @@ DataTypePtr DataTypeFactory::create_data_type(const 
doris::Field& col_desc) {
     }
 
     if (col_desc.is_nullable() && nested) {
-        return std::make_shared<DataTypeNullable>(std::move(nested));
+        return std::make_shared<DataTypeNullable>(nested);

Review comment:
       why do not use `std::move` ?

##########
File path: be/src/olap/row_block2.cpp
##########
@@ -349,11 +354,13 @@ Status RowBlockV2::_copy_data_to_column(int cid, 
doris::vectorized::MutableColum
     return Status::OK();
 }
 
-Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, 
uint16_t off, uint16_t len, doris::vectorized::MutableColumnPtr& origin_column) 
{
+Status RowBlockV2::_append_data_to_column(const ColumnVectorBatch* batch, 
size_t start,

Review comment:
       What is the motivation here to change uint16 to uint32?

##########
File path: be/src/vec/columns/column_array.cpp
##########
@@ -311,7 +311,7 @@ void ColumnArray::insert_range_from(const IColumn & src, 
size_t start, size_t le
         cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
     } else {
         size_t old_size = cur_offsets.size();
-        size_t prev_max_offset = old_size ? cur_offsets.back() : 0;

Review comment:
       ?cur_offsets is empty still get `cur_offesets.back()`

##########
File path: be/test/test_util/array_utils.cpp
##########
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "test_util/array_utils.h"
+
+#include "common/status.h"
+#include "exprs/anyval_util.h"
+#include "gen_cpp/olap_file.pb.h"
+#include "runtime/collection_value.h"
+#include "runtime/free_pool.hpp"
+#include "runtime/mem_pool.h"
+#include "runtime/mem_tracker.h"
+#include "udf/udf_internal.h"
+#include "util/array_parser.hpp"
+
+namespace doris {
+
+using TypeDesc = FunctionContext::TypeDesc;
+
+void ArrayUtils::prepare_context(FunctionContext& context, MemPool& mem_pool,
+                                 const ColumnPB& column_pb) {
+    auto function_type_desc = create_function_type_desc(column_pb);
+    context.impl()->_return_type = function_type_desc;
+    context.impl()->_pool = new FreePool(&mem_pool);
+}
+
+Status ArrayUtils::create_collection_value(CollectionValue* collection_value,
+                                           FunctionContext* context,
+                                           const std::string& json_string) {
+    CollectionVal collection_val;
+    auto status = ArrayParser::parse(collection_val, context, 
StringVal(json_string.c_str()));
+    if (!status.ok()) {
+        return status;
+    }
+    new (collection_value) CollectionValue(collection_val.data, 
collection_val.length,
+                                           collection_val.has_null, 
collection_val.null_signs);
+    return Status::OK();
+}
+
+TypeDesc ArrayUtils::create_function_type_desc(const ColumnPB& column_pb) {
+    TypeDesc type_desc;
+    type_desc.len = column_pb.length();
+    type_desc.precision = column_pb.precision();
+    type_desc.scale = column_pb.frac();
+    if (column_pb.type() == "ARRAY") {

Review comment:
       only 3 types ?

##########
File path: be/test/test_util/array_utils.cpp
##########
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "test_util/array_utils.h"
+
+#include "common/status.h"
+#include "exprs/anyval_util.h"
+#include "gen_cpp/olap_file.pb.h"
+#include "runtime/collection_value.h"
+#include "runtime/free_pool.hpp"
+#include "runtime/mem_pool.h"
+#include "runtime/mem_tracker.h"
+#include "udf/udf_internal.h"
+#include "util/array_parser.hpp"
+
+namespace doris {
+
+using TypeDesc = FunctionContext::TypeDesc;
+
+void ArrayUtils::prepare_context(FunctionContext& context, MemPool& mem_pool,
+                                 const ColumnPB& column_pb) {
+    auto function_type_desc = create_function_type_desc(column_pb);
+    context.impl()->_return_type = function_type_desc;
+    context.impl()->_pool = new FreePool(&mem_pool);
+}
+
+Status ArrayUtils::create_collection_value(CollectionValue* collection_value,
+                                           FunctionContext* context,
+                                           const std::string& json_string) {
+    CollectionVal collection_val;
+    auto status = ArrayParser::parse(collection_val, context, 
StringVal(json_string.c_str()));
+    if (!status.ok()) {
+        return status;
+    }
+    new (collection_value) CollectionValue(collection_val.data, 
collection_val.length,

Review comment:
       why use place new ? ArrayParser::parse `collection_value` not 
`collection_val`?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to