This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch vectorized in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit a9d9c02a2fefc16c0eccc572626fac8b58a64d70 Author: Pxl <[email protected]> AuthorDate: Wed Jan 12 15:55:58 2022 +0800 [Vectorized][Bug] Fix get wrong result when select random column && fix get wrong has_null_tag (#7728) --- be/src/vec/columns/column.h | 3 +++ be/src/vec/columns/column_nullable.h | 5 +++-- be/src/vec/core/block.cpp | 29 +++++++++++++++++------------ be/src/vec/olap/block_reader.cpp | 23 +++++++++++++++-------- be/src/vec/olap/block_reader.h | 4 ++-- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 88f9a3a..5e41028 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -336,6 +336,9 @@ public: // true iff column has null element virtual bool has_null() const { return false; } + // true iff column has null element [0,size) + virtual bool has_null(size_t size) const { return false; } + /// It's a special kind of column, that contain single value, but is not a ColumnConst. virtual bool is_dummy() const { return false; } diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 9641788..8f47777 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -176,8 +176,9 @@ public: /// Check that size of null map equals to size of nested column. void check_consistency() const; - bool has_null() const override { - size_t size = get_null_map_data().size(); + bool has_null() const override { return has_null(get_null_map_data().size()); } + + bool has_null(size_t size) const override { const UInt8* null_pos = get_null_map_data().data(); const UInt8* null_pos_end = get_null_map_data().data() + size; #ifdef __SSE2__ diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 2aff751..d200a46 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -21,18 +21,18 @@ #include "vec/core/block.h" #include <fmt/format.h> +#include <snappy.h> + #include <iomanip> #include <iterator> #include <memory> -#include <snappy.h> #include "common/status.h" #include "gen_cpp/data.pb.h" #include "runtime/descriptors.h" +#include "runtime/row_batch.h" #include "runtime/tuple.h" #include "runtime/tuple_row.h" -#include "runtime/row_batch.h" - #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" @@ -692,8 +692,10 @@ Status Block::filter_block(Block* block, int filter_column_id, int column_to_kee if (auto* nullable_column = check_and_get_column<ColumnNullable>(*filter_column)) { ColumnPtr nested_column = nullable_column->get_nested_column_ptr(); - MutableColumnPtr mutable_holder = nested_column->use_count() == 1 ? - nested_column->assume_mutable() : nested_column->clone_resized(nested_column->size()); + MutableColumnPtr mutable_holder = + nested_column->use_count() == 1 + ? nested_column->assume_mutable() + : nested_column->clone_resized(nested_column->size()); ColumnUInt8* concrete_column = typeid_cast<ColumnUInt8*>(mutable_holder.get()); if (!concrete_column) { @@ -769,8 +771,8 @@ void Block::serialize(RowBatch* output_batch, const RowDescriptor& row_desc) { } } -doris::Tuple* Block::deep_copy_tuple(const doris::TupleDescriptor& desc, MemPool* pool, - int row, int column_offset, bool padding_char) { +doris::Tuple* Block::deep_copy_tuple(const doris::TupleDescriptor& desc, MemPool* pool, int row, + int column_offset, bool padding_char) { auto dst = reinterpret_cast<doris::Tuple*>(pool->allocate(desc.byte_size())); for (int i = 0; i < desc.slots().size(); ++i) { @@ -787,8 +789,9 @@ doris::Tuple* Block::deep_copy_tuple(const doris::TupleDescriptor& desc, MemPool if (!slot_desc->type().is_string_type() && !slot_desc->type().is_date_type()) { memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), data_ref.data, data_ref.size); - } else if (slot_desc->type().is_string_type() && slot_desc->type() != TYPE_OBJECT){ - memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), (const void*)(&data_ref), sizeof(data_ref)); + } else if (slot_desc->type().is_string_type() && slot_desc->type() != TYPE_OBJECT) { + memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), (const void*)(&data_ref), + sizeof(data_ref)); // Copy the content of string if (padding_char && slot_desc->type() == TYPE_CHAR) { // serialize the content of string @@ -800,7 +803,8 @@ doris::Tuple* Block::deep_copy_tuple(const doris::TupleDescriptor& desc, MemPool } else { auto str_ptr = pool->allocate(data_ref.size); memcpy(str_ptr, data_ref.data, data_ref.size); - dst->get_string_slot(slot_desc->tuple_offset())->ptr = reinterpret_cast<char *>(str_ptr); + dst->get_string_slot(slot_desc->tuple_offset())->ptr = + reinterpret_cast<char*>(str_ptr); } } else if (slot_desc->type() == TYPE_OBJECT) { auto bitmap_value = (BitmapValue*)(data_ref.data); @@ -812,7 +816,8 @@ doris::Tuple* Block::deep_copy_tuple(const doris::TupleDescriptor& desc, MemPool bitmap_value->write(string_slot->ptr); string_slot->len = size; } else { - VecDateTimeValue ts = *reinterpret_cast<const doris::vectorized::VecDateTimeValue*>(data_ref.data); + VecDateTimeValue ts = + *reinterpret_cast<const doris::vectorized::VecDateTimeValue*>(data_ref.data); DateTimeValue dt; ts.convert_vec_dt_to_dt(&dt); memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), &dt, sizeof(DateTimeValue)); @@ -903,7 +908,7 @@ std::unique_ptr<Block> Block::create_same_struct_block(size_t size) const { auto temp_block = std::make_unique<Block>(); for (const auto& d : data) { auto column = d.type->create_column(); - column->insert_many_defaults(size); + column->resize(size); temp_block->insert({std::move(column), d.type, d.name}); } return temp_block; diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp index f7a1388..ec891f9 100644 --- a/be/src/vec/olap/block_reader.cpp +++ b/be/src/vec/olap/block_reader.cpp @@ -71,7 +71,7 @@ OLAPStatus BlockReader::_init_collect_iter(const ReaderParams& read_params, return OLAP_SUCCESS; } -void BlockReader::_init_agg_state() { +void BlockReader::_init_agg_state(const ReaderParams& read_params) { if (_eof) { return; } @@ -84,7 +84,10 @@ void BlockReader::_init_agg_state() { auto& tablet_schema = tablet()->tablet_schema(); for (auto idx : _agg_columns_idx) { - FieldAggregationMethod agg_method = tablet_schema.column(idx).aggregation(); + FieldAggregationMethod agg_method = + tablet_schema + .column(read_params.origin_return_columns->at(_return_columns_loc[idx])) + .aggregation(); std::string agg_name = TabletColumn::get_string_by_aggregation_type(agg_method) + agg_reader_suffix; std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(), @@ -157,7 +160,7 @@ OLAPStatus BlockReader::init(const ReaderParams& read_params) { break; case KeysType::AGG_KEYS: _next_block_func = &BlockReader::_agg_key_next_block; - _init_agg_state(); + _init_agg_state(read_params); break; default: DCHECK(false) << "No next row function for type:" << tablet()->keys_type(); @@ -282,11 +285,11 @@ void BlockReader::_append_agg_data(MutableColumns& columns) { void BlockReader::_update_agg_data(MutableColumns& columns) { // copy data to stored block - _copy_agg_data(); + size_t copy_size = _copy_agg_data(); // calculate has_null_tag for (auto idx : _agg_columns_idx) { - _stored_has_null_tag[idx] = _stored_data_columns[idx]->has_null(); + _stored_has_null_tag[idx] = _stored_data_columns[idx]->has_null(copy_size); } // calculate aggregate and insert @@ -305,8 +308,10 @@ void BlockReader::_update_agg_data(MutableColumns& columns) { _agg_data_counters.clear(); } -void BlockReader::_copy_agg_data() { - for (int i = 0; i < _stored_row_ref.size(); i++) { +size_t BlockReader::_copy_agg_data() { + size_t copy_size = _stored_row_ref.size(); + + for (size_t i = 0; i < copy_size; i++) { auto& ref = _stored_row_ref[i]; _temp_ref_map[ref.block].emplace_back(ref.row_pos, i); } @@ -315,7 +320,7 @@ void BlockReader::_copy_agg_data() { auto& dst_column = _stored_data_columns[idx]; if (_stored_has_string_tag[idx]) { //string type should replace ordered - for (int i = 0; i < _stored_row_ref.size(); i++) { + for (size_t i = 0; i < copy_size; i++) { auto& ref = _stored_row_ref[i]; dst_column->replace_column_data(*ref.block->get_by_position(idx).column, ref.row_pos, i); @@ -334,6 +339,8 @@ void BlockReader::_copy_agg_data() { it.second.clear(); } _stored_row_ref.clear(); + + return copy_size; } void BlockReader::_update_agg_value(MutableColumns& columns, int begin, int end, bool is_close) { diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h index 02cb570..0fba0be 100644 --- a/be/src/vec/olap/block_reader.h +++ b/be/src/vec/olap/block_reader.h @@ -73,7 +73,7 @@ private: OLAPStatus _init_collect_iter(const ReaderParams& read_params, std::vector<RowsetReaderSharedPtr>* valid_rs_readers); - void _init_agg_state(); + void _init_agg_state(const ReaderParams& read_params); void _insert_data_normal(MutableColumns& columns); @@ -81,7 +81,7 @@ private: void _update_agg_data(MutableColumns& columns); - void _copy_agg_data(); + size_t _copy_agg_data(); void _update_agg_value(MutableColumns& columns, int begin, int end, bool is_close = true); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
