github-actions[bot] commented on code in PR #42269: URL: https://github.com/apache/doris/pull/42269#discussion_r1838091494
########## be/test/vec/columns/column_ip_test.cpp: ########## @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> Review Comment: warning: 'gtest/gtest-message.h' file not found [clang-diagnostic-error] ```cpp #include <gtest/gtest-message.h> ^ ``` ########## be/test/vec/columns/common_column_test.h: ########## @@ -0,0 +1,1717 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <fstream> + +#include "olap/schema.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" +#include "vec/columns/columns_number.h" +#include "vec/core/field.h" +#include "vec/core/sort_block.h" +#include "vec/core/sort_description.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_map.h" + +// this test is gonna to be a column test template for all column which should make ut test to coverage the function defined in column +// for example column_array should test this function: +// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized, +// get_shrinked_column, filter, filter_by_selector, serialize_vec, deserialize_vec, get_max_row_byte_size +// +namespace doris::vectorized { + +class CommonColumnTest : public ::testing::Test { +protected: + // Helper function to load data from CSV, with index which splited by spliter and return a vector of ColumnPtr + void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& columns, + const std::string& file_path, const char spliter = ';', + const std::set<int> idxes = {0}, bool gen_check_data = false) { + ASSERT_EQ(serders.size(), columns.size()) + << "serder size: " << serders.size() << " column size: " << columns.size(); + ASSERT_EQ(serders.size(), idxes.size()) + << "serder size: " << serders.size() << " idxes size: " << idxes.size(); + ASSERT_EQ(serders.size(), *idxes.end()) + << "serder size: " << serders.size() << " idxes size: " << *idxes.end(); + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + + std::string line; + DataTypeSerDe::FormatOptions options; + std::vector<std::vector<std::string>> res; + while (std::getline(file, line)) { + std::stringstream lineStream(line); + + std::cout << "whole : " << lineStream.str() << std::endl; + std::string value; + int l_idx = 0; + int c_idx = 0; + std::vector<std::string> data; + while (std::getline(lineStream, value, spliter)) { + if (idxes.contains(l_idx)) { + Slice string_slice(value.data(), value.size()); + std::cout << string_slice << std::endl; + if (auto st = serders[c_idx]->deserialize_one_cell_from_json( + *columns[c_idx], string_slice, options); + !st.ok()) { + data.push_back(""); + std::cout << "error in deserialize but continue: " << st.to_string() + << std::endl; + } else { + data.push_back(value); + } + ++c_idx; + } + ++l_idx; + } + res.push_back(data); + } + if (gen_check_data) { + string filename = "./res.csv"; + std::ofstream res_file(filename); + std::cout << "gen check data: " << res.size() << std::endl; + if (!res_file.is_open()) { + throw std::ios_base::failure("Failed to open file."); + } + + for (const auto& row : res) { + for (size_t i = 0; i < row.size(); ++i) { + auto cell = row[i]; + res_file << cell; + if (i < row.size() - 1) { + res_file << ";"; // Add semicolon between columns + } + } + res_file << "\n"; // Newline after each row + } + + res_file.close(); + } + } + + // Helper function to load data from CSV + std::vector<std::vector<std::string>> load_data_from_csv(const std::string& file_path, + const char spliter = ';') { + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + std::vector<std::vector<std::string>> res; + std::string line; + + while (std::getline(file, line)) { + std::vector<std::string> data; + std::stringstream ss(line); + std::string value; + while (std::getline(ss, value, spliter)) { + data.push_back(value); + } + res.push_back(data); + } + + return res; + } + +private: +public: + void SetUp() override { + col_str = ColumnString::create(); + col_str->insert_data("aaa", 3); + col_str->insert_data("bb", 2); + col_str->insert_data("cccc", 4); + + col_int = ColumnInt64::create(); + col_int->insert_value(1); + col_int->insert_value(2); + col_int->insert_value(3); + + col_dcm = ColumnDecimal64::create(0, 3); + col_dcm->insert_value(1.23); + col_dcm->insert_value(4.56); + col_dcm->insert_value(7.89); + + col_arr = ColumnArray::create(ColumnInt64::create(), ColumnArray::ColumnOffsets::create()); + Array array1 = {1, 2, 3}; + Array array2 = {4}; + col_arr->insert(array1); + col_arr->insert(Array()); + col_arr->insert(array2); + + col_map = ColumnMap::create(ColumnString::create(), ColumnInt64::create(), + ColumnArray::ColumnOffsets::create()); + Array k1 = {"a", "b", "c"}; + Array v1 = {1, 2, 3}; + Array k2 = {"d"}; + Array v2 = {4}; + Array a = Array(); + Map map1, map2, map3; + map1.push_back(k1); + map1.push_back(v1); + col_map->insert(map1); + map3.push_back(a); + map3.push_back(a); + col_map->insert(map3); + map2.push_back(k2); + map2.push_back(v2); + col_map->insert(map2); + } + + ColumnString::MutablePtr col_str; + ColumnInt64::MutablePtr col_int; + ColumnDecimal64::MutablePtr col_dcm; + ColumnArray::MutablePtr col_arr; + ColumnMap::MutablePtr col_map; + + // Tool function to check data in column against expected results according different function in assert function + void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char col_spliter, + std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, + std::function<void(MutableColumns& load_cols, DataTypeSerDeSPtrs serders, + std::vector<std::vector<string>>& assert_res)> + assert_callback) { + ASSERT_EQ(serders.size(), columns.size()); + MutableColumns assert_columns(columns.size()); + for (size_t i = 0; i < columns.size(); ++i) { + assert_columns[i] = columns[i]->clone_empty(); + } + // Step 1: Insert data from `column_data_file` into the column and check result with `check_data_file` + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes, false); + auto res = load_data_from_csv(check_data_file, col_spliter); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, serders, res); + } + + void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, DataTypes dataTypes, + char col_spliter, std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, MutableColumns& check_columns, + DataTypeSerDeSPtrs check_serders, char check_col_spliter, + std::set<int> check_idxes, + std::function<void(MutableColumns& load_cols, MutableColumns& assert_columns, + DataTypes dataTypes)> + assert_callback) { + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes); + load_data_from_csv(check_serders, check_columns, check_data_file, col_spliter, idxes); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, check_columns, dataTypes); + } + + static void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, + size_t column_size) { + if (WhichDataType(dataType).is_map()) { + auto map1 = check_and_get_column<ColumnMap>(col1); + auto map2 = check_and_get_column<ColumnMap>(col2); + const DataTypeMap& rhs_map = static_cast<const DataTypeMap&>(dataType); + checkColumn(map1->get_keys(), map2->get_keys(), *rhs_map.get_key_type(), + map1->get_keys().size()); + checkColumn(map2->get_values(), map2->get_values(), *rhs_map.get_value_type(), + map1->get_values().size()); + } else { + if (WhichDataType(dataType).is_int8()) { + auto c1 = check_and_get_column<ColumnInt8>(col1); + auto c2 = check_and_get_column<ColumnInt8>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int16()) { + auto c1 = check_and_get_column<ColumnInt16>(col1); + auto c2 = check_and_get_column<ColumnInt16>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int32()) { + auto c1 = check_and_get_column<ColumnInt32>(col1); + auto c2 = check_and_get_column<ColumnInt32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int64()) { + auto c1 = check_and_get_column<ColumnInt64>(col1); + auto c2 = check_and_get_column<ColumnInt64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int128()) { + auto c1 = check_and_get_column<ColumnInt128>(col1); + auto c2 = check_and_get_column<ColumnInt128>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_float32()) { + auto c1 = check_and_get_column<ColumnFloat32>(col1); + auto c2 = check_and_get_column<ColumnFloat32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_float64()) { + auto c1 = check_and_get_column<ColumnFloat64>(col1); + auto c2 = check_and_get_column<ColumnFloat64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint8()) { + auto c1 = check_and_get_column<ColumnUInt8>(col1); + auto c2 = check_and_get_column<ColumnUInt8>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint16()) { + auto c1 = check_and_get_column<ColumnUInt16>(col1); + auto c2 = check_and_get_column<ColumnUInt16>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint32()) { + auto c1 = check_and_get_column<ColumnUInt32>(col1); + auto c2 = check_and_get_column<ColumnUInt32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint64()) { + auto c1 = check_and_get_column<ColumnUInt64>(col1); + auto c2 = check_and_get_column<ColumnUInt64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal()) { + auto c1 = check_and_get_column<ColumnDecimal64>(col1); + auto c2 = check_and_get_column<ColumnDecimal64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal32()) { + auto c1 = check_and_get_column<ColumnDecimal32>(col1); + auto c2 = check_and_get_column<ColumnDecimal32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal64()) { + auto c1 = check_and_get_column<ColumnDecimal64>(col1); + auto c2 = check_and_get_column<ColumnDecimal64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal128v2()) { + auto c1 = check_and_get_column<ColumnDecimal128V2>(col1); + auto c2 = check_and_get_column<ColumnDecimal128V2>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal128v3()) { + auto c1 = check_and_get_column<ColumnDecimal128V3>(col1); + auto c2 = check_and_get_column<ColumnDecimal128V3>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal256()) { + auto c1 = check_and_get_column<ColumnDecimal<Decimal256>>(col1); + auto c2 = check_and_get_column<ColumnDecimal<Decimal256>>(col1); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else { + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(col1.get_data_at(i), col2.get_data_at(i)); + } + } + } + } + + void printColumn(const IColumn& column, const IDataType& dataType) { Review Comment: warning: function 'printColumn' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp void printColumn(const IColumn& column, const IDataType& dataType) { ^ ``` <details> <summary>Additional context</summary> **be/test/vec/columns/common_column_test.h:346:** 135 lines including whitespace and comments (threshold 80) ```cpp void printColumn(const IColumn& column, const IDataType& dataType) { ^ ``` </details> ########## be/test/vec/columns/common_column_test.h: ########## @@ -0,0 +1,1717 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <fstream> + +#include "olap/schema.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" +#include "vec/columns/columns_number.h" +#include "vec/core/field.h" +#include "vec/core/sort_block.h" +#include "vec/core/sort_description.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_map.h" + +// this test is gonna to be a column test template for all column which should make ut test to coverage the function defined in column +// for example column_array should test this function: +// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized, +// get_shrinked_column, filter, filter_by_selector, serialize_vec, deserialize_vec, get_max_row_byte_size +// +namespace doris::vectorized { + +class CommonColumnTest : public ::testing::Test { +protected: + // Helper function to load data from CSV, with index which splited by spliter and return a vector of ColumnPtr + void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& columns, + const std::string& file_path, const char spliter = ';', + const std::set<int> idxes = {0}, bool gen_check_data = false) { + ASSERT_EQ(serders.size(), columns.size()) + << "serder size: " << serders.size() << " column size: " << columns.size(); + ASSERT_EQ(serders.size(), idxes.size()) + << "serder size: " << serders.size() << " idxes size: " << idxes.size(); + ASSERT_EQ(serders.size(), *idxes.end()) + << "serder size: " << serders.size() << " idxes size: " << *idxes.end(); + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + + std::string line; + DataTypeSerDe::FormatOptions options; + std::vector<std::vector<std::string>> res; + while (std::getline(file, line)) { + std::stringstream lineStream(line); + + std::cout << "whole : " << lineStream.str() << std::endl; + std::string value; + int l_idx = 0; + int c_idx = 0; + std::vector<std::string> data; + while (std::getline(lineStream, value, spliter)) { + if (idxes.contains(l_idx)) { + Slice string_slice(value.data(), value.size()); + std::cout << string_slice << std::endl; + if (auto st = serders[c_idx]->deserialize_one_cell_from_json( + *columns[c_idx], string_slice, options); + !st.ok()) { + data.push_back(""); + std::cout << "error in deserialize but continue: " << st.to_string() + << std::endl; + } else { + data.push_back(value); + } + ++c_idx; + } + ++l_idx; + } + res.push_back(data); + } + if (gen_check_data) { + string filename = "./res.csv"; + std::ofstream res_file(filename); + std::cout << "gen check data: " << res.size() << std::endl; + if (!res_file.is_open()) { + throw std::ios_base::failure("Failed to open file."); + } + + for (const auto& row : res) { + for (size_t i = 0; i < row.size(); ++i) { + auto cell = row[i]; + res_file << cell; + if (i < row.size() - 1) { + res_file << ";"; // Add semicolon between columns + } + } + res_file << "\n"; // Newline after each row + } + + res_file.close(); + } + } + + // Helper function to load data from CSV + std::vector<std::vector<std::string>> load_data_from_csv(const std::string& file_path, + const char spliter = ';') { + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + std::vector<std::vector<std::string>> res; + std::string line; + + while (std::getline(file, line)) { + std::vector<std::string> data; + std::stringstream ss(line); + std::string value; + while (std::getline(ss, value, spliter)) { + data.push_back(value); + } + res.push_back(data); + } + + return res; + } + +private: +public: + void SetUp() override { + col_str = ColumnString::create(); + col_str->insert_data("aaa", 3); + col_str->insert_data("bb", 2); + col_str->insert_data("cccc", 4); + + col_int = ColumnInt64::create(); + col_int->insert_value(1); + col_int->insert_value(2); + col_int->insert_value(3); + + col_dcm = ColumnDecimal64::create(0, 3); + col_dcm->insert_value(1.23); + col_dcm->insert_value(4.56); + col_dcm->insert_value(7.89); + + col_arr = ColumnArray::create(ColumnInt64::create(), ColumnArray::ColumnOffsets::create()); + Array array1 = {1, 2, 3}; + Array array2 = {4}; + col_arr->insert(array1); + col_arr->insert(Array()); + col_arr->insert(array2); + + col_map = ColumnMap::create(ColumnString::create(), ColumnInt64::create(), + ColumnArray::ColumnOffsets::create()); + Array k1 = {"a", "b", "c"}; + Array v1 = {1, 2, 3}; + Array k2 = {"d"}; + Array v2 = {4}; + Array a = Array(); + Map map1, map2, map3; + map1.push_back(k1); + map1.push_back(v1); + col_map->insert(map1); + map3.push_back(a); + map3.push_back(a); + col_map->insert(map3); + map2.push_back(k2); + map2.push_back(v2); + col_map->insert(map2); + } + + ColumnString::MutablePtr col_str; + ColumnInt64::MutablePtr col_int; + ColumnDecimal64::MutablePtr col_dcm; + ColumnArray::MutablePtr col_arr; + ColumnMap::MutablePtr col_map; + + // Tool function to check data in column against expected results according different function in assert function + void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char col_spliter, + std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, + std::function<void(MutableColumns& load_cols, DataTypeSerDeSPtrs serders, + std::vector<std::vector<string>>& assert_res)> + assert_callback) { + ASSERT_EQ(serders.size(), columns.size()); + MutableColumns assert_columns(columns.size()); + for (size_t i = 0; i < columns.size(); ++i) { + assert_columns[i] = columns[i]->clone_empty(); + } + // Step 1: Insert data from `column_data_file` into the column and check result with `check_data_file` + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes, false); + auto res = load_data_from_csv(check_data_file, col_spliter); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, serders, res); + } + + void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, DataTypes dataTypes, + char col_spliter, std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, MutableColumns& check_columns, + DataTypeSerDeSPtrs check_serders, char check_col_spliter, + std::set<int> check_idxes, + std::function<void(MutableColumns& load_cols, MutableColumns& assert_columns, + DataTypes dataTypes)> + assert_callback) { + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes); + load_data_from_csv(check_serders, check_columns, check_data_file, col_spliter, idxes); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, check_columns, dataTypes); + } + + static void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, Review Comment: warning: function 'checkColumn' exceeds recommended size/complexity thresholds [readability-function-size] ```cpp static void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, ^ ``` <details> <summary>Additional context</summary> **be/test/vec/columns/common_column_test.h:225:** 118 lines including whitespace and comments (threshold 80) ```cpp static void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, ^ ``` </details> ########## be/test/vec/columns/common_column_test.h: ########## @@ -0,0 +1,1717 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <fstream> + +#include "olap/schema.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" +#include "vec/columns/columns_number.h" +#include "vec/core/field.h" +#include "vec/core/sort_block.h" +#include "vec/core/sort_description.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_map.h" + +// this test is gonna to be a column test template for all column which should make ut test to coverage the function defined in column +// for example column_array should test this function: +// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized, +// get_shrinked_column, filter, filter_by_selector, serialize_vec, deserialize_vec, get_max_row_byte_size +// +namespace doris::vectorized { + +class CommonColumnTest : public ::testing::Test { +protected: + // Helper function to load data from CSV, with index which splited by spliter and return a vector of ColumnPtr + void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& columns, + const std::string& file_path, const char spliter = ';', + const std::set<int> idxes = {0}, bool gen_check_data = false) { + ASSERT_EQ(serders.size(), columns.size()) + << "serder size: " << serders.size() << " column size: " << columns.size(); + ASSERT_EQ(serders.size(), idxes.size()) + << "serder size: " << serders.size() << " idxes size: " << idxes.size(); + ASSERT_EQ(serders.size(), *idxes.end()) + << "serder size: " << serders.size() << " idxes size: " << *idxes.end(); + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + + std::string line; + DataTypeSerDe::FormatOptions options; + std::vector<std::vector<std::string>> res; + while (std::getline(file, line)) { + std::stringstream lineStream(line); + + std::cout << "whole : " << lineStream.str() << std::endl; + std::string value; + int l_idx = 0; + int c_idx = 0; + std::vector<std::string> data; + while (std::getline(lineStream, value, spliter)) { + if (idxes.contains(l_idx)) { + Slice string_slice(value.data(), value.size()); + std::cout << string_slice << std::endl; + if (auto st = serders[c_idx]->deserialize_one_cell_from_json( + *columns[c_idx], string_slice, options); + !st.ok()) { + data.push_back(""); + std::cout << "error in deserialize but continue: " << st.to_string() + << std::endl; + } else { + data.push_back(value); + } + ++c_idx; + } + ++l_idx; + } + res.push_back(data); + } + if (gen_check_data) { + string filename = "./res.csv"; + std::ofstream res_file(filename); + std::cout << "gen check data: " << res.size() << std::endl; + if (!res_file.is_open()) { + throw std::ios_base::failure("Failed to open file."); + } + + for (const auto& row : res) { + for (size_t i = 0; i < row.size(); ++i) { + auto cell = row[i]; + res_file << cell; + if (i < row.size() - 1) { + res_file << ";"; // Add semicolon between columns + } + } + res_file << "\n"; // Newline after each row + } + + res_file.close(); + } + } + + // Helper function to load data from CSV + std::vector<std::vector<std::string>> load_data_from_csv(const std::string& file_path, + const char spliter = ';') { + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + std::vector<std::vector<std::string>> res; + std::string line; + + while (std::getline(file, line)) { + std::vector<std::string> data; + std::stringstream ss(line); + std::string value; + while (std::getline(ss, value, spliter)) { + data.push_back(value); + } + res.push_back(data); + } + + return res; + } + +private: +public: + void SetUp() override { + col_str = ColumnString::create(); + col_str->insert_data("aaa", 3); + col_str->insert_data("bb", 2); + col_str->insert_data("cccc", 4); + + col_int = ColumnInt64::create(); + col_int->insert_value(1); + col_int->insert_value(2); + col_int->insert_value(3); + + col_dcm = ColumnDecimal64::create(0, 3); + col_dcm->insert_value(1.23); + col_dcm->insert_value(4.56); + col_dcm->insert_value(7.89); + + col_arr = ColumnArray::create(ColumnInt64::create(), ColumnArray::ColumnOffsets::create()); + Array array1 = {1, 2, 3}; + Array array2 = {4}; + col_arr->insert(array1); + col_arr->insert(Array()); + col_arr->insert(array2); + + col_map = ColumnMap::create(ColumnString::create(), ColumnInt64::create(), + ColumnArray::ColumnOffsets::create()); + Array k1 = {"a", "b", "c"}; + Array v1 = {1, 2, 3}; + Array k2 = {"d"}; + Array v2 = {4}; + Array a = Array(); + Map map1, map2, map3; + map1.push_back(k1); + map1.push_back(v1); + col_map->insert(map1); + map3.push_back(a); + map3.push_back(a); + col_map->insert(map3); + map2.push_back(k2); + map2.push_back(v2); + col_map->insert(map2); + } + + ColumnString::MutablePtr col_str; + ColumnInt64::MutablePtr col_int; + ColumnDecimal64::MutablePtr col_dcm; + ColumnArray::MutablePtr col_arr; + ColumnMap::MutablePtr col_map; + + // Tool function to check data in column against expected results according different function in assert function + void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char col_spliter, + std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, + std::function<void(MutableColumns& load_cols, DataTypeSerDeSPtrs serders, + std::vector<std::vector<string>>& assert_res)> + assert_callback) { + ASSERT_EQ(serders.size(), columns.size()); + MutableColumns assert_columns(columns.size()); + for (size_t i = 0; i < columns.size(); ++i) { + assert_columns[i] = columns[i]->clone_empty(); + } + // Step 1: Insert data from `column_data_file` into the column and check result with `check_data_file` + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes, false); + auto res = load_data_from_csv(check_data_file, col_spliter); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, serders, res); + } + + void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, DataTypes dataTypes, + char col_spliter, std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, MutableColumns& check_columns, + DataTypeSerDeSPtrs check_serders, char check_col_spliter, + std::set<int> check_idxes, + std::function<void(MutableColumns& load_cols, MutableColumns& assert_columns, + DataTypes dataTypes)> + assert_callback) { + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes); + load_data_from_csv(check_serders, check_columns, check_data_file, col_spliter, idxes); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, check_columns, dataTypes); + } + + static void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, Review Comment: warning: function 'checkColumn' has cognitive complexity of 75 (threshold 50) [readability-function-cognitive-complexity] ```cpp static void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, ^ ``` <details> <summary>Additional context</summary> **be/test/vec/columns/common_column_test.h:227:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (WhichDataType(dataType).is_map()) { ^ ``` **be/test/vec/columns/common_column_test.h:235:** +1, nesting level increased to 1 ```cpp } else { ^ ``` **be/test/vec/columns/common_column_test.h:236:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (WhichDataType(dataType).is_int8()) { ^ ``` **be/test/vec/columns/common_column_test.h:239:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:242:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int16()) { ^ ``` **be/test/vec/columns/common_column_test.h:245:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:248:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int32()) { ^ ``` **be/test/vec/columns/common_column_test.h:251:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:254:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int64()) { ^ ``` **be/test/vec/columns/common_column_test.h:257:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:260:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int128()) { ^ ``` **be/test/vec/columns/common_column_test.h:263:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:266:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_float32()) { ^ ``` **be/test/vec/columns/common_column_test.h:269:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:272:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_float64()) { ^ ``` **be/test/vec/columns/common_column_test.h:275:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:278:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint8()) { ^ ``` **be/test/vec/columns/common_column_test.h:281:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:284:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint16()) { ^ ``` **be/test/vec/columns/common_column_test.h:287:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:290:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint32()) { ^ ``` **be/test/vec/columns/common_column_test.h:293:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:296:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint64()) { ^ ``` **be/test/vec/columns/common_column_test.h:299:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:302:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal()) { ^ ``` **be/test/vec/columns/common_column_test.h:305:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:308:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal32()) { ^ ``` **be/test/vec/columns/common_column_test.h:311:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:314:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal64()) { ^ ``` **be/test/vec/columns/common_column_test.h:317:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:320:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal128v2()) { ^ ``` **be/test/vec/columns/common_column_test.h:323:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:326:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal128v3()) { ^ ``` **be/test/vec/columns/common_column_test.h:329:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:332:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal256()) { ^ ``` **be/test/vec/columns/common_column_test.h:335:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:338:** +1, nesting level increased to 2 ```cpp } else { ^ ``` **be/test/vec/columns/common_column_test.h:339:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` </details> ########## be/test/vec/columns/common_column_test.h: ########## @@ -0,0 +1,1717 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> + +#include <fstream> + +#include "olap/schema.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" +#include "vec/columns/columns_number.h" +#include "vec/core/field.h" +#include "vec/core/sort_block.h" +#include "vec/core/sort_description.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_map.h" + +// this test is gonna to be a column test template for all column which should make ut test to coverage the function defined in column +// for example column_array should test this function: +// size, reserve, resize, empty, byte_size, allocated_bytes, clone_resized, +// get_shrinked_column, filter, filter_by_selector, serialize_vec, deserialize_vec, get_max_row_byte_size +// +namespace doris::vectorized { + +class CommonColumnTest : public ::testing::Test { +protected: + // Helper function to load data from CSV, with index which splited by spliter and return a vector of ColumnPtr + void load_data_from_csv(const DataTypeSerDeSPtrs serders, MutableColumns& columns, + const std::string& file_path, const char spliter = ';', + const std::set<int> idxes = {0}, bool gen_check_data = false) { + ASSERT_EQ(serders.size(), columns.size()) + << "serder size: " << serders.size() << " column size: " << columns.size(); + ASSERT_EQ(serders.size(), idxes.size()) + << "serder size: " << serders.size() << " idxes size: " << idxes.size(); + ASSERT_EQ(serders.size(), *idxes.end()) + << "serder size: " << serders.size() << " idxes size: " << *idxes.end(); + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + + std::string line; + DataTypeSerDe::FormatOptions options; + std::vector<std::vector<std::string>> res; + while (std::getline(file, line)) { + std::stringstream lineStream(line); + + std::cout << "whole : " << lineStream.str() << std::endl; + std::string value; + int l_idx = 0; + int c_idx = 0; + std::vector<std::string> data; + while (std::getline(lineStream, value, spliter)) { + if (idxes.contains(l_idx)) { + Slice string_slice(value.data(), value.size()); + std::cout << string_slice << std::endl; + if (auto st = serders[c_idx]->deserialize_one_cell_from_json( + *columns[c_idx], string_slice, options); + !st.ok()) { + data.push_back(""); + std::cout << "error in deserialize but continue: " << st.to_string() + << std::endl; + } else { + data.push_back(value); + } + ++c_idx; + } + ++l_idx; + } + res.push_back(data); + } + if (gen_check_data) { + string filename = "./res.csv"; + std::ofstream res_file(filename); + std::cout << "gen check data: " << res.size() << std::endl; + if (!res_file.is_open()) { + throw std::ios_base::failure("Failed to open file."); + } + + for (const auto& row : res) { + for (size_t i = 0; i < row.size(); ++i) { + auto cell = row[i]; + res_file << cell; + if (i < row.size() - 1) { + res_file << ";"; // Add semicolon between columns + } + } + res_file << "\n"; // Newline after each row + } + + res_file.close(); + } + } + + // Helper function to load data from CSV + std::vector<std::vector<std::string>> load_data_from_csv(const std::string& file_path, + const char spliter = ';') { + std::ifstream file(file_path); + if (!file) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "can not open the file: {} ", + file_path); + } + std::vector<std::vector<std::string>> res; + std::string line; + + while (std::getline(file, line)) { + std::vector<std::string> data; + std::stringstream ss(line); + std::string value; + while (std::getline(ss, value, spliter)) { + data.push_back(value); + } + res.push_back(data); + } + + return res; + } + +private: +public: + void SetUp() override { + col_str = ColumnString::create(); + col_str->insert_data("aaa", 3); + col_str->insert_data("bb", 2); + col_str->insert_data("cccc", 4); + + col_int = ColumnInt64::create(); + col_int->insert_value(1); + col_int->insert_value(2); + col_int->insert_value(3); + + col_dcm = ColumnDecimal64::create(0, 3); + col_dcm->insert_value(1.23); + col_dcm->insert_value(4.56); + col_dcm->insert_value(7.89); + + col_arr = ColumnArray::create(ColumnInt64::create(), ColumnArray::ColumnOffsets::create()); + Array array1 = {1, 2, 3}; + Array array2 = {4}; + col_arr->insert(array1); + col_arr->insert(Array()); + col_arr->insert(array2); + + col_map = ColumnMap::create(ColumnString::create(), ColumnInt64::create(), + ColumnArray::ColumnOffsets::create()); + Array k1 = {"a", "b", "c"}; + Array v1 = {1, 2, 3}; + Array k2 = {"d"}; + Array v2 = {4}; + Array a = Array(); + Map map1, map2, map3; + map1.push_back(k1); + map1.push_back(v1); + col_map->insert(map1); + map3.push_back(a); + map3.push_back(a); + col_map->insert(map3); + map2.push_back(k2); + map2.push_back(v2); + col_map->insert(map2); + } + + ColumnString::MutablePtr col_str; + ColumnInt64::MutablePtr col_int; + ColumnDecimal64::MutablePtr col_dcm; + ColumnArray::MutablePtr col_arr; + ColumnMap::MutablePtr col_map; + + // Tool function to check data in column against expected results according different function in assert function + void check_data(MutableColumns& columns, DataTypeSerDeSPtrs serders, char col_spliter, + std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, + std::function<void(MutableColumns& load_cols, DataTypeSerDeSPtrs serders, + std::vector<std::vector<string>>& assert_res)> + assert_callback) { + ASSERT_EQ(serders.size(), columns.size()); + MutableColumns assert_columns(columns.size()); + for (size_t i = 0; i < columns.size(); ++i) { + assert_columns[i] = columns[i]->clone_empty(); + } + // Step 1: Insert data from `column_data_file` into the column and check result with `check_data_file` + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes, false); + auto res = load_data_from_csv(check_data_file, col_spliter); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, serders, res); + } + + void check_columns(MutableColumns& columns, DataTypeSerDeSPtrs serders, DataTypes dataTypes, + char col_spliter, std::set<int> idxes, const std::string& column_data_file, + const std::string& check_data_file, MutableColumns& check_columns, + DataTypeSerDeSPtrs check_serders, char check_col_spliter, + std::set<int> check_idxes, + std::function<void(MutableColumns& load_cols, MutableColumns& assert_columns, + DataTypes dataTypes)> + assert_callback) { + // Load column data and expected data from CSV files + load_data_from_csv(serders, columns, column_data_file, col_spliter, idxes); + load_data_from_csv(check_serders, check_columns, check_data_file, col_spliter, idxes); + + // Step 2: Validate the data in `column` matches `expected_data` + assert_callback(columns, check_columns, dataTypes); + } + + static void checkColumn(const IColumn& col1, const IColumn& col2, const IDataType& dataType, + size_t column_size) { + if (WhichDataType(dataType).is_map()) { + auto map1 = check_and_get_column<ColumnMap>(col1); + auto map2 = check_and_get_column<ColumnMap>(col2); + const DataTypeMap& rhs_map = static_cast<const DataTypeMap&>(dataType); + checkColumn(map1->get_keys(), map2->get_keys(), *rhs_map.get_key_type(), + map1->get_keys().size()); + checkColumn(map2->get_values(), map2->get_values(), *rhs_map.get_value_type(), + map1->get_values().size()); + } else { + if (WhichDataType(dataType).is_int8()) { + auto c1 = check_and_get_column<ColumnInt8>(col1); + auto c2 = check_and_get_column<ColumnInt8>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int16()) { + auto c1 = check_and_get_column<ColumnInt16>(col1); + auto c2 = check_and_get_column<ColumnInt16>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int32()) { + auto c1 = check_and_get_column<ColumnInt32>(col1); + auto c2 = check_and_get_column<ColumnInt32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int64()) { + auto c1 = check_and_get_column<ColumnInt64>(col1); + auto c2 = check_and_get_column<ColumnInt64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_int128()) { + auto c1 = check_and_get_column<ColumnInt128>(col1); + auto c2 = check_and_get_column<ColumnInt128>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_float32()) { + auto c1 = check_and_get_column<ColumnFloat32>(col1); + auto c2 = check_and_get_column<ColumnFloat32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_float64()) { + auto c1 = check_and_get_column<ColumnFloat64>(col1); + auto c2 = check_and_get_column<ColumnFloat64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint8()) { + auto c1 = check_and_get_column<ColumnUInt8>(col1); + auto c2 = check_and_get_column<ColumnUInt8>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint16()) { + auto c1 = check_and_get_column<ColumnUInt16>(col1); + auto c2 = check_and_get_column<ColumnUInt16>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint32()) { + auto c1 = check_and_get_column<ColumnUInt32>(col1); + auto c2 = check_and_get_column<ColumnUInt32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_uint64()) { + auto c1 = check_and_get_column<ColumnUInt64>(col1); + auto c2 = check_and_get_column<ColumnUInt64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal()) { + auto c1 = check_and_get_column<ColumnDecimal64>(col1); + auto c2 = check_and_get_column<ColumnDecimal64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal32()) { + auto c1 = check_and_get_column<ColumnDecimal32>(col1); + auto c2 = check_and_get_column<ColumnDecimal32>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal64()) { + auto c1 = check_and_get_column<ColumnDecimal64>(col1); + auto c2 = check_and_get_column<ColumnDecimal64>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal128v2()) { + auto c1 = check_and_get_column<ColumnDecimal128V2>(col1); + auto c2 = check_and_get_column<ColumnDecimal128V2>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal128v3()) { + auto c1 = check_and_get_column<ColumnDecimal128V3>(col1); + auto c2 = check_and_get_column<ColumnDecimal128V3>(col2); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else if (WhichDataType(dataType).is_decimal256()) { + auto c1 = check_and_get_column<ColumnDecimal<Decimal256>>(col1); + auto c2 = check_and_get_column<ColumnDecimal<Decimal256>>(col1); + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(c1->get_element(i), c2->get_element(i)); + } + } else { + for (size_t i = 0; i < column_size; ++i) { + EXPECT_EQ(col1.get_data_at(i), col2.get_data_at(i)); + } + } + } + } + + void printColumn(const IColumn& column, const IDataType& dataType) { Review Comment: warning: function 'printColumn' has cognitive complexity of 96 (threshold 50) [readability-function-cognitive-complexity] ```cpp void printColumn(const IColumn& column, const IDataType& dataType) { ^ ``` <details> <summary>Additional context</summary> **be/test/vec/columns/common_column_test.h:348:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (WhichDataType(dataType).is_map()) { ^ ``` **be/test/vec/columns/common_column_test.h:354:** +1, nesting level increased to 1 ```cpp } else if (WhichDataType(dataType).is_array()) { ^ ``` **be/test/vec/columns/common_column_test.h:359:** +1, nesting level increased to 1 ```cpp } else { ^ ``` **be/test/vec/columns/common_column_test.h:362:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (WhichDataType(dataType).is_int8()) { ^ ``` **be/test/vec/columns/common_column_test.h:364:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:367:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int16()) { ^ ``` **be/test/vec/columns/common_column_test.h:369:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:372:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int32()) { ^ ``` **be/test/vec/columns/common_column_test.h:374:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:377:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int64()) { ^ ``` **be/test/vec/columns/common_column_test.h:379:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:382:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_int128()) { ^ ``` **be/test/vec/columns/common_column_test.h:384:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:387:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_float32()) { ^ ``` **be/test/vec/columns/common_column_test.h:389:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:392:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_float64()) { ^ ``` **be/test/vec/columns/common_column_test.h:394:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:397:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint8()) { ^ ``` **be/test/vec/columns/common_column_test.h:399:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:402:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint16()) { ^ ``` **be/test/vec/columns/common_column_test.h:404:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:407:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint32()) { ^ ``` **be/test/vec/columns/common_column_test.h:409:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:412:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint64()) { ^ ``` **be/test/vec/columns/common_column_test.h:414:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:417:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_uint128()) { ^ ``` **be/test/vec/columns/common_column_test.h:419:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:422:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal()) { ^ ``` **be/test/vec/columns/common_column_test.h:424:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:427:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal32()) { ^ ``` **be/test/vec/columns/common_column_test.h:429:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:432:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal64()) { ^ ``` **be/test/vec/columns/common_column_test.h:434:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:437:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal128v2()) { ^ ``` **be/test/vec/columns/common_column_test.h:439:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:442:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal128v3()) { ^ ``` **be/test/vec/columns/common_column_test.h:444:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:447:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_decimal256()) { ^ ``` **be/test/vec/columns/common_column_test.h:449:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:452:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_date()) { ^ ``` **be/test/vec/columns/common_column_test.h:454:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:457:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_date_time()) { ^ ``` **be/test/vec/columns/common_column_test.h:459:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:462:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_date_v2()) { ^ ``` **be/test/vec/columns/common_column_test.h:464:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:467:** +1, nesting level increased to 2 ```cpp } else if (WhichDataType(dataType).is_date_time_v2()) { ^ ``` **be/test/vec/columns/common_column_test.h:469:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` **be/test/vec/columns/common_column_test.h:472:** +1, nesting level increased to 2 ```cpp } else { ^ ``` **be/test/vec/columns/common_column_test.h:475:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i < column_size; ++i) { ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
