github-actions[bot] commented on code in PR #42269:
URL: https://github.com/apache/doris/pull/42269#discussion_r1840391583


##########
be/test/vec/columns/common_column_test.h:
##########
@@ -0,0 +1,1682 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <fstream>
+
+#include "olap/schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_map.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/field.h"
+#include "vec/core/sort_block.h"
+#include "vec/core/sort_description.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_map.h"
+
+// this test is gonna to be a column test template for all column which should 
make ut test to coverage the function defined in column
+// for example column_array should test this function:
+// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized,
+// get_shrinked_column, filter, filter_by_selector, serialize_vec, 
deserialize_vec, get_max_row_byte_size
+//
+namespace doris::vectorized {
+
+static bool gen_check_data_in_assert = false;
+
+class CommonColumnTest : public ::testing::Test {
+protected:
+    // Helper function to load data from CSV, with index which splited by 
spliter and load to columns
+    void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& 
columns,
+                            const std::string& file_path, const char spliter = 
';',
+                            const std::set<int> idxes = {0}) {
+        ASSERT_EQ(serders.size(), columns.size())
+                << "serder size: " << serders.size() << " column size: " << 
columns.size();
+        ASSERT_EQ(serders.size(), idxes.size())
+                << "serder size: " << serders.size() << " idxes size: " << 
idxes.size();
+        ASSERT_EQ(serders.size(), *idxes.end())
+                << "serder size: " << serders.size() << " idxes size: " << 
*idxes.end();
+        std::ifstream file(file_path);
+        if (!file) {
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open 
the file: {} ",
+                                   file_path);
+        }
+
+        std::string line;
+        DataTypeSerDe::FormatOptions options;
+        while (std::getline(file, line)) {
+            std::stringstream lineStream(line);
+            std::cout << "whole : " << lineStream.str() << std::endl;
+            std::string value;
+            int l_idx = 0;
+            int c_idx = 0;
+            while (std::getline(lineStream, value, spliter)) {
+                if (idxes.contains(l_idx)) {
+                    Slice string_slice(value.data(), value.size());
+                    std::cout << string_slice << std::endl;
+                    if (auto st = 
serders[c_idx]->deserialize_one_cell_from_json(
+                                *columns[c_idx], string_slice, options);
+                        !st.ok()) {
+                        std::cout << "error in deserialize but continue: " << 
st.to_string()
+                                  << std::endl;
+                    }
+                    ++c_idx;
+                }
+                ++l_idx;
+            }
+        }
+    }
+
+    //// this is very helpful function to check data in column against 
expected results according different function in assert function
+    //// such as run regress tests
+    ////  if gen_check_data_in_assert is true, we will generate a file for 
check data, otherwise we will read the file to check data
+    ////  so the key point is we should how we write assert callback function 
to check data,
+    ///   and when check data is generated, we should check result to statisfy 
the semantic of the function
+    static void check_res_file(string function_name, 
std::vector<std::vector<std::string>>& res) {
+        string filename = "./res_" + function_name + ".csv";
+        if (gen_check_data_in_assert) {
+            std::ofstream res_file(filename);
+            std::cout << "gen check data: " << res.size() << " with file: " << 
filename
+                      << std::endl;
+            if (!res_file.is_open()) {
+                throw std::ios_base::failure("Failed to open file.");
+            }
+
+            for (const auto& row : res) {
+                for (size_t i = 0; i < row.size(); ++i) {
+                    auto cell = row[i];
+                    res_file << cell;
+                    if (i < row.size() - 1) {
+                        res_file << ";"; // Add semicolon between columns
+                    }
+                }
+                res_file << "\n"; // Newline after each row
+            }
+
+            res_file.close();
+        } else {
+            // we read generate file to check result
+            std::cout << "check data: " << res.size() << " with file: " << 
filename << std::endl;
+            std::ifstream file(filename);
+            if (!file) {
+                throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not 
open the file: {} ",
+                                       filename);
+            }
+
+            std::string line;
+            std::vector<std::vector<std::string>> assert_res;
+            while (std::getline(file, line)) {
+                std::vector<std::string> row;
+                std::stringstream lineStream(line);
+                std::string value;
+                while (std::getline(lineStream, value, ';')) {
+                    row.push_back(value);
+                }
+                assert_res.push_back(row);
+            }
+
+            // we just do check here
+            for (size_t i = 0; i < res.size(); ++i) {
+                for (size_t j = 0; j < res[i].size(); ++j) {
+                    EXPECT_EQ(res[i][j], assert_res[i][j]);
+                }
+            }
+        }
+    }
+
+public:
+    void SetUp() override {
+        col_str = ColumnString::create();
+        col_str->insert_data("aaa", 3);
+        col_str->insert_data("bb", 2);
+        col_str->insert_data("cccc", 4);
+
+        col_int = ColumnInt64::create();
+        col_int->insert_value(1);
+        col_int->insert_value(2);
+        col_int->insert_value(3);
+
+        col_dcm = ColumnDecimal64::create(0, 3);
+        col_dcm->insert_value(1.23);
+        col_dcm->insert_value(4.56);
+        col_dcm->insert_value(7.89);
+
+        col_arr = ColumnArray::create(ColumnInt64::create(), 
ColumnArray::ColumnOffsets::create());
+        Array array1 = {1, 2, 3};
+        Array array2 = {4};
+        col_arr->insert(array1);
+        col_arr->insert(Array());
+        col_arr->insert(array2);
+
+        col_map = ColumnMap::create(ColumnString::create(), 
ColumnInt64::create(),
+                                    ColumnArray::ColumnOffsets::create());
+        Array k1 = {"a", "b", "c"};
+        Array v1 = {1, 2, 3};
+        Array k2 = {"d"};
+        Array v2 = {4};
+        Array a = Array();
+        Map map1, map2, map3;
+        map1.push_back(k1);
+        map1.push_back(v1);
+        col_map->insert(map1);
+        map3.push_back(a);
+        map3.push_back(a);
+        col_map->insert(map3);
+        map2.push_back(k2);
+        map2.push_back(v2);
+        col_map->insert(map2);
+    }
+
+    ColumnString::MutablePtr col_str;
+    ColumnInt64::MutablePtr col_int;
+    ColumnDecimal64::MutablePtr col_dcm;
+    ColumnArray::MutablePtr col_arr;
+    ColumnMap::MutablePtr col_map;
+
+    
////==================================================================================================================
+    // this is common function to check data in column against expected 
results according different function in assert function
+    // which can be used in all column test
+    // such as run regress tests
+    //  step1. we can set gen_check_data_in_assert to true, then we will 
generate a file for check data, otherwise we will read the file to check data
+    //  step2. we should write assert callback function to check data
+    void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char 
col_spliter,
+                    std::set<int> idxes, const std::string& column_data_file,
+                    std::function<void(MutableColumns& load_cols, 
DataTypeSerDeSPtrs serders)>
+                            assert_callback) {
+        ASSERT_EQ(serders.size(), columns.size());
+        // Step 1: Insert data from `column_data_file` into the column and 
check result with `check_data_file`
+        // Load column data and expected data from CSV files
+        std::vector<std::vector<std::string>> res;
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, serders);
+    }
+    
////==================================================================================================================
+
+    void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, 
DataTypes dataTypes,
+                       char col_spliter, std::set<int> idxes, const 
std::string& column_data_file,
+                       const std::string& check_data_file, MutableColumns& 
check_columns,
+                       DataTypeSerDeSPtrs check_serders, char 
check_col_spliter,
+                       std::set<int> check_idxes,
+                       std::function<void(MutableColumns& load_cols, 
MutableColumns& assert_columns,
+                                          DataTypes dataTypes)>
+                               assert_callback) {
+        // Load column data and expected data from CSV files
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+        load_data_from_csv(check_serders, check_columns, check_data_file, 
col_spliter, idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, check_columns, dataTypes);
+    }
+
+    static void checkColumn(const IColumn& col1, const IColumn& col2, const 
IDataType& dataType,

Review Comment:
   warning: function 'checkColumn' exceeds recommended size/complexity 
thresholds [readability-function-size]
   ```cpp
       static void checkColumn(const IColumn& col1, const IColumn& col2, const 
IDataType& dataType,
                   ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/test/vec/columns/common_column_test.h:231:** 118 lines including 
whitespace and comments (threshold 80)
   ```cpp
       static void checkColumn(const IColumn& col1, const IColumn& col2, const 
IDataType& dataType,
                   ^
   ```
   
   </details>
   



##########
be/test/vec/columns/common_column_test.h:
##########
@@ -0,0 +1,1682 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <fstream>
+
+#include "olap/schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_map.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/field.h"
+#include "vec/core/sort_block.h"
+#include "vec/core/sort_description.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_map.h"
+
+// this test is gonna to be a column test template for all column which should 
make ut test to coverage the function defined in column
+// for example column_array should test this function:
+// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized,
+// get_shrinked_column, filter, filter_by_selector, serialize_vec, 
deserialize_vec, get_max_row_byte_size
+//
+namespace doris::vectorized {
+
+static bool gen_check_data_in_assert = false;
+
+class CommonColumnTest : public ::testing::Test {
+protected:
+    // Helper function to load data from CSV, with index which splited by 
spliter and load to columns
+    void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& 
columns,
+                            const std::string& file_path, const char spliter = 
';',
+                            const std::set<int> idxes = {0}) {
+        ASSERT_EQ(serders.size(), columns.size())
+                << "serder size: " << serders.size() << " column size: " << 
columns.size();
+        ASSERT_EQ(serders.size(), idxes.size())
+                << "serder size: " << serders.size() << " idxes size: " << 
idxes.size();
+        ASSERT_EQ(serders.size(), *idxes.end())
+                << "serder size: " << serders.size() << " idxes size: " << 
*idxes.end();
+        std::ifstream file(file_path);
+        if (!file) {
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open 
the file: {} ",
+                                   file_path);
+        }
+
+        std::string line;
+        DataTypeSerDe::FormatOptions options;
+        while (std::getline(file, line)) {
+            std::stringstream lineStream(line);
+            std::cout << "whole : " << lineStream.str() << std::endl;
+            std::string value;
+            int l_idx = 0;
+            int c_idx = 0;
+            while (std::getline(lineStream, value, spliter)) {
+                if (idxes.contains(l_idx)) {
+                    Slice string_slice(value.data(), value.size());
+                    std::cout << string_slice << std::endl;
+                    if (auto st = 
serders[c_idx]->deserialize_one_cell_from_json(
+                                *columns[c_idx], string_slice, options);
+                        !st.ok()) {
+                        std::cout << "error in deserialize but continue: " << 
st.to_string()
+                                  << std::endl;
+                    }
+                    ++c_idx;
+                }
+                ++l_idx;
+            }
+        }
+    }
+
+    //// this is very helpful function to check data in column against 
expected results according different function in assert function
+    //// such as run regress tests
+    ////  if gen_check_data_in_assert is true, we will generate a file for 
check data, otherwise we will read the file to check data
+    ////  so the key point is we should how we write assert callback function 
to check data,
+    ///   and when check data is generated, we should check result to statisfy 
the semantic of the function
+    static void check_res_file(string function_name, 
std::vector<std::vector<std::string>>& res) {
+        string filename = "./res_" + function_name + ".csv";
+        if (gen_check_data_in_assert) {
+            std::ofstream res_file(filename);
+            std::cout << "gen check data: " << res.size() << " with file: " << 
filename
+                      << std::endl;
+            if (!res_file.is_open()) {
+                throw std::ios_base::failure("Failed to open file.");
+            }
+
+            for (const auto& row : res) {
+                for (size_t i = 0; i < row.size(); ++i) {
+                    auto cell = row[i];
+                    res_file << cell;
+                    if (i < row.size() - 1) {
+                        res_file << ";"; // Add semicolon between columns
+                    }
+                }
+                res_file << "\n"; // Newline after each row
+            }
+
+            res_file.close();
+        } else {
+            // we read generate file to check result
+            std::cout << "check data: " << res.size() << " with file: " << 
filename << std::endl;
+            std::ifstream file(filename);
+            if (!file) {
+                throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not 
open the file: {} ",
+                                       filename);
+            }
+
+            std::string line;
+            std::vector<std::vector<std::string>> assert_res;
+            while (std::getline(file, line)) {
+                std::vector<std::string> row;
+                std::stringstream lineStream(line);
+                std::string value;
+                while (std::getline(lineStream, value, ';')) {
+                    row.push_back(value);
+                }
+                assert_res.push_back(row);
+            }
+
+            // we just do check here
+            for (size_t i = 0; i < res.size(); ++i) {
+                for (size_t j = 0; j < res[i].size(); ++j) {
+                    EXPECT_EQ(res[i][j], assert_res[i][j]);
+                }
+            }
+        }
+    }
+
+public:
+    void SetUp() override {
+        col_str = ColumnString::create();
+        col_str->insert_data("aaa", 3);
+        col_str->insert_data("bb", 2);
+        col_str->insert_data("cccc", 4);
+
+        col_int = ColumnInt64::create();
+        col_int->insert_value(1);
+        col_int->insert_value(2);
+        col_int->insert_value(3);
+
+        col_dcm = ColumnDecimal64::create(0, 3);
+        col_dcm->insert_value(1.23);
+        col_dcm->insert_value(4.56);
+        col_dcm->insert_value(7.89);
+
+        col_arr = ColumnArray::create(ColumnInt64::create(), 
ColumnArray::ColumnOffsets::create());
+        Array array1 = {1, 2, 3};
+        Array array2 = {4};
+        col_arr->insert(array1);
+        col_arr->insert(Array());
+        col_arr->insert(array2);
+
+        col_map = ColumnMap::create(ColumnString::create(), 
ColumnInt64::create(),
+                                    ColumnArray::ColumnOffsets::create());
+        Array k1 = {"a", "b", "c"};
+        Array v1 = {1, 2, 3};
+        Array k2 = {"d"};
+        Array v2 = {4};
+        Array a = Array();
+        Map map1, map2, map3;
+        map1.push_back(k1);
+        map1.push_back(v1);
+        col_map->insert(map1);
+        map3.push_back(a);
+        map3.push_back(a);
+        col_map->insert(map3);
+        map2.push_back(k2);
+        map2.push_back(v2);
+        col_map->insert(map2);
+    }
+
+    ColumnString::MutablePtr col_str;
+    ColumnInt64::MutablePtr col_int;
+    ColumnDecimal64::MutablePtr col_dcm;
+    ColumnArray::MutablePtr col_arr;
+    ColumnMap::MutablePtr col_map;
+
+    
////==================================================================================================================
+    // this is common function to check data in column against expected 
results according different function in assert function
+    // which can be used in all column test
+    // such as run regress tests
+    //  step1. we can set gen_check_data_in_assert to true, then we will 
generate a file for check data, otherwise we will read the file to check data
+    //  step2. we should write assert callback function to check data
+    void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char 
col_spliter,
+                    std::set<int> idxes, const std::string& column_data_file,
+                    std::function<void(MutableColumns& load_cols, 
DataTypeSerDeSPtrs serders)>
+                            assert_callback) {
+        ASSERT_EQ(serders.size(), columns.size());
+        // Step 1: Insert data from `column_data_file` into the column and 
check result with `check_data_file`
+        // Load column data and expected data from CSV files
+        std::vector<std::vector<std::string>> res;
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, serders);
+    }
+    
////==================================================================================================================
+
+    void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, 
DataTypes dataTypes,
+                       char col_spliter, std::set<int> idxes, const 
std::string& column_data_file,
+                       const std::string& check_data_file, MutableColumns& 
check_columns,
+                       DataTypeSerDeSPtrs check_serders, char 
check_col_spliter,
+                       std::set<int> check_idxes,
+                       std::function<void(MutableColumns& load_cols, 
MutableColumns& assert_columns,
+                                          DataTypes dataTypes)>
+                               assert_callback) {
+        // Load column data and expected data from CSV files
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+        load_data_from_csv(check_serders, check_columns, check_data_file, 
col_spliter, idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, check_columns, dataTypes);
+    }
+
+    static void checkColumn(const IColumn& col1, const IColumn& col2, const 
IDataType& dataType,
+                            size_t column_size) {
+        if (WhichDataType(dataType).is_map()) {
+            auto map1 = check_and_get_column<ColumnMap>(col1);
+            auto map2 = check_and_get_column<ColumnMap>(col2);
+            const DataTypeMap& rhs_map = static_cast<const 
DataTypeMap&>(dataType);
+            checkColumn(map1->get_keys(), map2->get_keys(), 
*rhs_map.get_key_type(),
+                        map1->get_keys().size());
+            checkColumn(map2->get_values(), map2->get_values(), 
*rhs_map.get_value_type(),
+                        map1->get_values().size());
+        } else {
+            if (WhichDataType(dataType).is_int8()) {
+                auto c1 = check_and_get_column<ColumnInt8>(col1);
+                auto c2 = check_and_get_column<ColumnInt8>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int16()) {
+                auto c1 = check_and_get_column<ColumnInt16>(col1);
+                auto c2 = check_and_get_column<ColumnInt16>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int32()) {
+                auto c1 = check_and_get_column<ColumnInt32>(col1);
+                auto c2 = check_and_get_column<ColumnInt32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int64()) {
+                auto c1 = check_and_get_column<ColumnInt64>(col1);
+                auto c2 = check_and_get_column<ColumnInt64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int128()) {
+                auto c1 = check_and_get_column<ColumnInt128>(col1);
+                auto c2 = check_and_get_column<ColumnInt128>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_float32()) {
+                auto c1 = check_and_get_column<ColumnFloat32>(col1);
+                auto c2 = check_and_get_column<ColumnFloat32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_float64()) {
+                auto c1 = check_and_get_column<ColumnFloat64>(col1);
+                auto c2 = check_and_get_column<ColumnFloat64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint8()) {
+                auto c1 = check_and_get_column<ColumnUInt8>(col1);
+                auto c2 = check_and_get_column<ColumnUInt8>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint16()) {
+                auto c1 = check_and_get_column<ColumnUInt16>(col1);
+                auto c2 = check_and_get_column<ColumnUInt16>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint32()) {
+                auto c1 = check_and_get_column<ColumnUInt32>(col1);
+                auto c2 = check_and_get_column<ColumnUInt32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint64()) {
+                auto c1 = check_and_get_column<ColumnUInt64>(col1);
+                auto c2 = check_and_get_column<ColumnUInt64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal()) {
+                auto c1 = check_and_get_column<ColumnDecimal64>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal32()) {
+                auto c1 = check_and_get_column<ColumnDecimal32>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal64()) {
+                auto c1 = check_and_get_column<ColumnDecimal64>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal128v2()) {
+                auto c1 = check_and_get_column<ColumnDecimal128V2>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal128V2>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal128v3()) {
+                auto c1 = check_and_get_column<ColumnDecimal128V3>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal128V3>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal256()) {
+                auto c1 = 
check_and_get_column<ColumnDecimal<Decimal256>>(col1);
+                auto c2 = 
check_and_get_column<ColumnDecimal<Decimal256>>(col1);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else {
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(col1.get_data_at(i), col2.get_data_at(i));
+                }
+            }
+        }
+    }
+
+    void printColumn(const IColumn& column, const IDataType& dataType) {

Review Comment:
   warning: function 'printColumn' has cognitive complexity of 96 (threshold 
50) [readability-function-cognitive-complexity]
   ```cpp
       void printColumn(const IColumn& column, const IDataType& dataType) {
            ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/test/vec/columns/common_column_test.h:354:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
           if (WhichDataType(dataType).is_map()) {
           ^
   ```
   **be/test/vec/columns/common_column_test.h:360:** +1, nesting level 
increased to 1
   ```cpp
           } else if (WhichDataType(dataType).is_array()) {
                  ^
   ```
   **be/test/vec/columns/common_column_test.h:365:** +1, nesting level 
increased to 1
   ```cpp
           } else {
             ^
   ```
   **be/test/vec/columns/common_column_test.h:368:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
               if (WhichDataType(dataType).is_int8()) {
               ^
   ```
   **be/test/vec/columns/common_column_test.h:370:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:373:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int16()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:375:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:378:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:380:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:383:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:385:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:388:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int128()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:390:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:393:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_float32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:395:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:398:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_float64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:400:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:403:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint8()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:405:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:408:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint16()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:410:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:413:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:415:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:418:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:420:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:423:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint128()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:425:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:428:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:430:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:433:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:435:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:438:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:440:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:443:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal128v2()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:445:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:448:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal128v3()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:450:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:453:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal256()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:455:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:458:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_date()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:460:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:463:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_date_time()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:465:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:468:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_date_v2()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:470:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:473:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_date_time_v2()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:475:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:478:** +1, nesting level 
increased to 2
   ```cpp
               } else {
                 ^
   ```
   **be/test/vec/columns/common_column_test.h:481:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   
   </details>
   



##########
be/test/vec/columns/common_column_test.h:
##########
@@ -0,0 +1,1682 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <fstream>
+
+#include "olap/schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_map.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/field.h"
+#include "vec/core/sort_block.h"
+#include "vec/core/sort_description.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_map.h"
+
+// this test is gonna to be a column test template for all column which should 
make ut test to coverage the function defined in column
+// for example column_array should test this function:
+// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized,
+// get_shrinked_column, filter, filter_by_selector, serialize_vec, 
deserialize_vec, get_max_row_byte_size
+//
+namespace doris::vectorized {
+
+static bool gen_check_data_in_assert = false;
+
+class CommonColumnTest : public ::testing::Test {
+protected:
+    // Helper function to load data from CSV, with index which splited by 
spliter and load to columns
+    void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& 
columns,
+                            const std::string& file_path, const char spliter = 
';',
+                            const std::set<int> idxes = {0}) {
+        ASSERT_EQ(serders.size(), columns.size())
+                << "serder size: " << serders.size() << " column size: " << 
columns.size();
+        ASSERT_EQ(serders.size(), idxes.size())
+                << "serder size: " << serders.size() << " idxes size: " << 
idxes.size();
+        ASSERT_EQ(serders.size(), *idxes.end())
+                << "serder size: " << serders.size() << " idxes size: " << 
*idxes.end();
+        std::ifstream file(file_path);
+        if (!file) {
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open 
the file: {} ",
+                                   file_path);
+        }
+
+        std::string line;
+        DataTypeSerDe::FormatOptions options;
+        while (std::getline(file, line)) {
+            std::stringstream lineStream(line);
+            std::cout << "whole : " << lineStream.str() << std::endl;
+            std::string value;
+            int l_idx = 0;
+            int c_idx = 0;
+            while (std::getline(lineStream, value, spliter)) {
+                if (idxes.contains(l_idx)) {
+                    Slice string_slice(value.data(), value.size());
+                    std::cout << string_slice << std::endl;
+                    if (auto st = 
serders[c_idx]->deserialize_one_cell_from_json(
+                                *columns[c_idx], string_slice, options);
+                        !st.ok()) {
+                        std::cout << "error in deserialize but continue: " << 
st.to_string()
+                                  << std::endl;
+                    }
+                    ++c_idx;
+                }
+                ++l_idx;
+            }
+        }
+    }
+
+    //// this is very helpful function to check data in column against 
expected results according different function in assert function
+    //// such as run regress tests
+    ////  if gen_check_data_in_assert is true, we will generate a file for 
check data, otherwise we will read the file to check data
+    ////  so the key point is we should how we write assert callback function 
to check data,
+    ///   and when check data is generated, we should check result to statisfy 
the semantic of the function
+    static void check_res_file(string function_name, 
std::vector<std::vector<std::string>>& res) {
+        string filename = "./res_" + function_name + ".csv";
+        if (gen_check_data_in_assert) {
+            std::ofstream res_file(filename);
+            std::cout << "gen check data: " << res.size() << " with file: " << 
filename
+                      << std::endl;
+            if (!res_file.is_open()) {
+                throw std::ios_base::failure("Failed to open file.");
+            }
+
+            for (const auto& row : res) {
+                for (size_t i = 0; i < row.size(); ++i) {
+                    auto cell = row[i];
+                    res_file << cell;
+                    if (i < row.size() - 1) {
+                        res_file << ";"; // Add semicolon between columns
+                    }
+                }
+                res_file << "\n"; // Newline after each row
+            }
+
+            res_file.close();
+        } else {
+            // we read generate file to check result
+            std::cout << "check data: " << res.size() << " with file: " << 
filename << std::endl;
+            std::ifstream file(filename);
+            if (!file) {
+                throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not 
open the file: {} ",
+                                       filename);
+            }
+
+            std::string line;
+            std::vector<std::vector<std::string>> assert_res;
+            while (std::getline(file, line)) {
+                std::vector<std::string> row;
+                std::stringstream lineStream(line);
+                std::string value;
+                while (std::getline(lineStream, value, ';')) {
+                    row.push_back(value);
+                }
+                assert_res.push_back(row);
+            }
+
+            // we just do check here
+            for (size_t i = 0; i < res.size(); ++i) {
+                for (size_t j = 0; j < res[i].size(); ++j) {
+                    EXPECT_EQ(res[i][j], assert_res[i][j]);
+                }
+            }
+        }
+    }
+
+public:
+    void SetUp() override {
+        col_str = ColumnString::create();
+        col_str->insert_data("aaa", 3);
+        col_str->insert_data("bb", 2);
+        col_str->insert_data("cccc", 4);
+
+        col_int = ColumnInt64::create();
+        col_int->insert_value(1);
+        col_int->insert_value(2);
+        col_int->insert_value(3);
+
+        col_dcm = ColumnDecimal64::create(0, 3);
+        col_dcm->insert_value(1.23);
+        col_dcm->insert_value(4.56);
+        col_dcm->insert_value(7.89);
+
+        col_arr = ColumnArray::create(ColumnInt64::create(), 
ColumnArray::ColumnOffsets::create());
+        Array array1 = {1, 2, 3};
+        Array array2 = {4};
+        col_arr->insert(array1);
+        col_arr->insert(Array());
+        col_arr->insert(array2);
+
+        col_map = ColumnMap::create(ColumnString::create(), 
ColumnInt64::create(),
+                                    ColumnArray::ColumnOffsets::create());
+        Array k1 = {"a", "b", "c"};
+        Array v1 = {1, 2, 3};
+        Array k2 = {"d"};
+        Array v2 = {4};
+        Array a = Array();
+        Map map1, map2, map3;
+        map1.push_back(k1);
+        map1.push_back(v1);
+        col_map->insert(map1);
+        map3.push_back(a);
+        map3.push_back(a);
+        col_map->insert(map3);
+        map2.push_back(k2);
+        map2.push_back(v2);
+        col_map->insert(map2);
+    }
+
+    ColumnString::MutablePtr col_str;
+    ColumnInt64::MutablePtr col_int;
+    ColumnDecimal64::MutablePtr col_dcm;
+    ColumnArray::MutablePtr col_arr;
+    ColumnMap::MutablePtr col_map;
+
+    
////==================================================================================================================
+    // this is common function to check data in column against expected 
results according different function in assert function
+    // which can be used in all column test
+    // such as run regress tests
+    //  step1. we can set gen_check_data_in_assert to true, then we will 
generate a file for check data, otherwise we will read the file to check data
+    //  step2. we should write assert callback function to check data
+    void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char 
col_spliter,
+                    std::set<int> idxes, const std::string& column_data_file,
+                    std::function<void(MutableColumns& load_cols, 
DataTypeSerDeSPtrs serders)>
+                            assert_callback) {
+        ASSERT_EQ(serders.size(), columns.size());
+        // Step 1: Insert data from `column_data_file` into the column and 
check result with `check_data_file`
+        // Load column data and expected data from CSV files
+        std::vector<std::vector<std::string>> res;
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, serders);
+    }
+    
////==================================================================================================================
+
+    void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, 
DataTypes dataTypes,
+                       char col_spliter, std::set<int> idxes, const 
std::string& column_data_file,
+                       const std::string& check_data_file, MutableColumns& 
check_columns,
+                       DataTypeSerDeSPtrs check_serders, char 
check_col_spliter,
+                       std::set<int> check_idxes,
+                       std::function<void(MutableColumns& load_cols, 
MutableColumns& assert_columns,
+                                          DataTypes dataTypes)>
+                               assert_callback) {
+        // Load column data and expected data from CSV files
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+        load_data_from_csv(check_serders, check_columns, check_data_file, 
col_spliter, idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, check_columns, dataTypes);
+    }
+
+    static void checkColumn(const IColumn& col1, const IColumn& col2, const 
IDataType& dataType,

Review Comment:
   warning: function 'checkColumn' has cognitive complexity of 75 (threshold 
50) [readability-function-cognitive-complexity]
   ```cpp
       static void checkColumn(const IColumn& col1, const IColumn& col2, const 
IDataType& dataType,
                   ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/test/vec/columns/common_column_test.h:233:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
           if (WhichDataType(dataType).is_map()) {
           ^
   ```
   **be/test/vec/columns/common_column_test.h:241:** +1, nesting level 
increased to 1
   ```cpp
           } else {
             ^
   ```
   **be/test/vec/columns/common_column_test.h:242:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
               if (WhichDataType(dataType).is_int8()) {
               ^
   ```
   **be/test/vec/columns/common_column_test.h:245:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:248:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int16()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:251:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:254:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:257:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:260:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:263:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:266:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_int128()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:269:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:272:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_float32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:275:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:278:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_float64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:281:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:284:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint8()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:287:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:290:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint16()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:293:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:296:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:299:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:302:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_uint64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:305:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:308:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:311:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:314:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal32()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:317:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:320:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal64()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:323:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:326:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal128v2()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:329:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:332:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal128v3()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:335:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:338:** +1, nesting level 
increased to 2
   ```cpp
               } else if (WhichDataType(dataType).is_decimal256()) {
                      ^
   ```
   **be/test/vec/columns/common_column_test.h:341:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   **be/test/vec/columns/common_column_test.h:344:** +1, nesting level 
increased to 2
   ```cpp
               } else {
                 ^
   ```
   **be/test/vec/columns/common_column_test.h:345:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
                   for (size_t i = 0; i < column_size; ++i) {
                   ^
   ```
   
   </details>
   



##########
be/test/vec/columns/common_column_test.h:
##########
@@ -0,0 +1,1682 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include <fstream>
+
+#include "olap/schema.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_map.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/field.h"
+#include "vec/core/sort_block.h"
+#include "vec/core/sort_description.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_map.h"
+
+// this test is gonna to be a column test template for all column which should 
make ut test to coverage the function defined in column
+// for example column_array should test this function:
+// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized,
+// get_shrinked_column, filter, filter_by_selector, serialize_vec, 
deserialize_vec, get_max_row_byte_size
+//
+namespace doris::vectorized {
+
+static bool gen_check_data_in_assert = false;
+
+class CommonColumnTest : public ::testing::Test {
+protected:
+    // Helper function to load data from CSV, with index which splited by 
spliter and load to columns
+    void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& 
columns,
+                            const std::string& file_path, const char spliter = 
';',
+                            const std::set<int> idxes = {0}) {
+        ASSERT_EQ(serders.size(), columns.size())
+                << "serder size: " << serders.size() << " column size: " << 
columns.size();
+        ASSERT_EQ(serders.size(), idxes.size())
+                << "serder size: " << serders.size() << " idxes size: " << 
idxes.size();
+        ASSERT_EQ(serders.size(), *idxes.end())
+                << "serder size: " << serders.size() << " idxes size: " << 
*idxes.end();
+        std::ifstream file(file_path);
+        if (!file) {
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open 
the file: {} ",
+                                   file_path);
+        }
+
+        std::string line;
+        DataTypeSerDe::FormatOptions options;
+        while (std::getline(file, line)) {
+            std::stringstream lineStream(line);
+            std::cout << "whole : " << lineStream.str() << std::endl;
+            std::string value;
+            int l_idx = 0;
+            int c_idx = 0;
+            while (std::getline(lineStream, value, spliter)) {
+                if (idxes.contains(l_idx)) {
+                    Slice string_slice(value.data(), value.size());
+                    std::cout << string_slice << std::endl;
+                    if (auto st = 
serders[c_idx]->deserialize_one_cell_from_json(
+                                *columns[c_idx], string_slice, options);
+                        !st.ok()) {
+                        std::cout << "error in deserialize but continue: " << 
st.to_string()
+                                  << std::endl;
+                    }
+                    ++c_idx;
+                }
+                ++l_idx;
+            }
+        }
+    }
+
+    //// this is very helpful function to check data in column against 
expected results according different function in assert function
+    //// such as run regress tests
+    ////  if gen_check_data_in_assert is true, we will generate a file for 
check data, otherwise we will read the file to check data
+    ////  so the key point is we should how we write assert callback function 
to check data,
+    ///   and when check data is generated, we should check result to statisfy 
the semantic of the function
+    static void check_res_file(string function_name, 
std::vector<std::vector<std::string>>& res) {
+        string filename = "./res_" + function_name + ".csv";
+        if (gen_check_data_in_assert) {
+            std::ofstream res_file(filename);
+            std::cout << "gen check data: " << res.size() << " with file: " << 
filename
+                      << std::endl;
+            if (!res_file.is_open()) {
+                throw std::ios_base::failure("Failed to open file.");
+            }
+
+            for (const auto& row : res) {
+                for (size_t i = 0; i < row.size(); ++i) {
+                    auto cell = row[i];
+                    res_file << cell;
+                    if (i < row.size() - 1) {
+                        res_file << ";"; // Add semicolon between columns
+                    }
+                }
+                res_file << "\n"; // Newline after each row
+            }
+
+            res_file.close();
+        } else {
+            // we read generate file to check result
+            std::cout << "check data: " << res.size() << " with file: " << 
filename << std::endl;
+            std::ifstream file(filename);
+            if (!file) {
+                throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not 
open the file: {} ",
+                                       filename);
+            }
+
+            std::string line;
+            std::vector<std::vector<std::string>> assert_res;
+            while (std::getline(file, line)) {
+                std::vector<std::string> row;
+                std::stringstream lineStream(line);
+                std::string value;
+                while (std::getline(lineStream, value, ';')) {
+                    row.push_back(value);
+                }
+                assert_res.push_back(row);
+            }
+
+            // we just do check here
+            for (size_t i = 0; i < res.size(); ++i) {
+                for (size_t j = 0; j < res[i].size(); ++j) {
+                    EXPECT_EQ(res[i][j], assert_res[i][j]);
+                }
+            }
+        }
+    }
+
+public:
+    void SetUp() override {
+        col_str = ColumnString::create();
+        col_str->insert_data("aaa", 3);
+        col_str->insert_data("bb", 2);
+        col_str->insert_data("cccc", 4);
+
+        col_int = ColumnInt64::create();
+        col_int->insert_value(1);
+        col_int->insert_value(2);
+        col_int->insert_value(3);
+
+        col_dcm = ColumnDecimal64::create(0, 3);
+        col_dcm->insert_value(1.23);
+        col_dcm->insert_value(4.56);
+        col_dcm->insert_value(7.89);
+
+        col_arr = ColumnArray::create(ColumnInt64::create(), 
ColumnArray::ColumnOffsets::create());
+        Array array1 = {1, 2, 3};
+        Array array2 = {4};
+        col_arr->insert(array1);
+        col_arr->insert(Array());
+        col_arr->insert(array2);
+
+        col_map = ColumnMap::create(ColumnString::create(), 
ColumnInt64::create(),
+                                    ColumnArray::ColumnOffsets::create());
+        Array k1 = {"a", "b", "c"};
+        Array v1 = {1, 2, 3};
+        Array k2 = {"d"};
+        Array v2 = {4};
+        Array a = Array();
+        Map map1, map2, map3;
+        map1.push_back(k1);
+        map1.push_back(v1);
+        col_map->insert(map1);
+        map3.push_back(a);
+        map3.push_back(a);
+        col_map->insert(map3);
+        map2.push_back(k2);
+        map2.push_back(v2);
+        col_map->insert(map2);
+    }
+
+    ColumnString::MutablePtr col_str;
+    ColumnInt64::MutablePtr col_int;
+    ColumnDecimal64::MutablePtr col_dcm;
+    ColumnArray::MutablePtr col_arr;
+    ColumnMap::MutablePtr col_map;
+
+    
////==================================================================================================================
+    // this is common function to check data in column against expected 
results according different function in assert function
+    // which can be used in all column test
+    // such as run regress tests
+    //  step1. we can set gen_check_data_in_assert to true, then we will 
generate a file for check data, otherwise we will read the file to check data
+    //  step2. we should write assert callback function to check data
+    void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char 
col_spliter,
+                    std::set<int> idxes, const std::string& column_data_file,
+                    std::function<void(MutableColumns& load_cols, 
DataTypeSerDeSPtrs serders)>
+                            assert_callback) {
+        ASSERT_EQ(serders.size(), columns.size());
+        // Step 1: Insert data from `column_data_file` into the column and 
check result with `check_data_file`
+        // Load column data and expected data from CSV files
+        std::vector<std::vector<std::string>> res;
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, serders);
+    }
+    
////==================================================================================================================
+
+    void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, 
DataTypes dataTypes,
+                       char col_spliter, std::set<int> idxes, const 
std::string& column_data_file,
+                       const std::string& check_data_file, MutableColumns& 
check_columns,
+                       DataTypeSerDeSPtrs check_serders, char 
check_col_spliter,
+                       std::set<int> check_idxes,
+                       std::function<void(MutableColumns& load_cols, 
MutableColumns& assert_columns,
+                                          DataTypes dataTypes)>
+                               assert_callback) {
+        // Load column data and expected data from CSV files
+        load_data_from_csv(serders, columns, column_data_file, col_spliter, 
idxes);
+        load_data_from_csv(check_serders, check_columns, check_data_file, 
col_spliter, idxes);
+
+        // Step 2: Validate the data in `column` matches `expected_data`
+        assert_callback(columns, check_columns, dataTypes);
+    }
+
+    static void checkColumn(const IColumn& col1, const IColumn& col2, const 
IDataType& dataType,
+                            size_t column_size) {
+        if (WhichDataType(dataType).is_map()) {
+            auto map1 = check_and_get_column<ColumnMap>(col1);
+            auto map2 = check_and_get_column<ColumnMap>(col2);
+            const DataTypeMap& rhs_map = static_cast<const 
DataTypeMap&>(dataType);
+            checkColumn(map1->get_keys(), map2->get_keys(), 
*rhs_map.get_key_type(),
+                        map1->get_keys().size());
+            checkColumn(map2->get_values(), map2->get_values(), 
*rhs_map.get_value_type(),
+                        map1->get_values().size());
+        } else {
+            if (WhichDataType(dataType).is_int8()) {
+                auto c1 = check_and_get_column<ColumnInt8>(col1);
+                auto c2 = check_and_get_column<ColumnInt8>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int16()) {
+                auto c1 = check_and_get_column<ColumnInt16>(col1);
+                auto c2 = check_and_get_column<ColumnInt16>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int32()) {
+                auto c1 = check_and_get_column<ColumnInt32>(col1);
+                auto c2 = check_and_get_column<ColumnInt32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int64()) {
+                auto c1 = check_and_get_column<ColumnInt64>(col1);
+                auto c2 = check_and_get_column<ColumnInt64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_int128()) {
+                auto c1 = check_and_get_column<ColumnInt128>(col1);
+                auto c2 = check_and_get_column<ColumnInt128>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_float32()) {
+                auto c1 = check_and_get_column<ColumnFloat32>(col1);
+                auto c2 = check_and_get_column<ColumnFloat32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_float64()) {
+                auto c1 = check_and_get_column<ColumnFloat64>(col1);
+                auto c2 = check_and_get_column<ColumnFloat64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint8()) {
+                auto c1 = check_and_get_column<ColumnUInt8>(col1);
+                auto c2 = check_and_get_column<ColumnUInt8>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint16()) {
+                auto c1 = check_and_get_column<ColumnUInt16>(col1);
+                auto c2 = check_and_get_column<ColumnUInt16>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint32()) {
+                auto c1 = check_and_get_column<ColumnUInt32>(col1);
+                auto c2 = check_and_get_column<ColumnUInt32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_uint64()) {
+                auto c1 = check_and_get_column<ColumnUInt64>(col1);
+                auto c2 = check_and_get_column<ColumnUInt64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal()) {
+                auto c1 = check_and_get_column<ColumnDecimal64>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal32()) {
+                auto c1 = check_and_get_column<ColumnDecimal32>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal32>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal64()) {
+                auto c1 = check_and_get_column<ColumnDecimal64>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal64>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal128v2()) {
+                auto c1 = check_and_get_column<ColumnDecimal128V2>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal128V2>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal128v3()) {
+                auto c1 = check_and_get_column<ColumnDecimal128V3>(col1);
+                auto c2 = check_and_get_column<ColumnDecimal128V3>(col2);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else if (WhichDataType(dataType).is_decimal256()) {
+                auto c1 = 
check_and_get_column<ColumnDecimal<Decimal256>>(col1);
+                auto c2 = 
check_and_get_column<ColumnDecimal<Decimal256>>(col1);
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(c1->get_element(i), c2->get_element(i));
+                }
+            } else {
+                for (size_t i = 0; i < column_size; ++i) {
+                    EXPECT_EQ(col1.get_data_at(i), col2.get_data_at(i));
+                }
+            }
+        }
+    }
+
+    void printColumn(const IColumn& column, const IDataType& dataType) {

Review Comment:
   warning: function 'printColumn' exceeds recommended size/complexity 
thresholds [readability-function-size]
   ```cpp
       void printColumn(const IColumn& column, const IDataType& dataType) {
            ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/test/vec/columns/common_column_test.h:352:** 135 lines including 
whitespace and comments (threshold 80)
   ```cpp
       void printColumn(const IColumn& column, const IDataType& dataType) {
            ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to