github-actions[bot] commented on code in PR #17372:
URL: https://github.com/apache/doris/pull/17372#discussion_r1150213473
##########
be/src/exprs/function_filter.h:
##########
@@ -36,8 +35,8 @@
std::string _col_name;
// these pointer's life time controlled by scan node
doris::FunctionContext* _fn_ctx;
- doris::StringVal
- _string_param; // only one param from conjunct, because now only
support like predicate
+ // only one param from conjunct, because now only support like predicate
+ doris::StringRef _string_param;
Review Comment:
warning: field has incomplete type 'doris::StringRef'
[clang-diagnostic-error]
```cpp
doris::StringRef _string_param;
^
```
**be/src/udf/udf.h:35:** forward declaration of 'doris::StringRef'
```cpp
struct StringRef;
^
```
##########
be/src/exprs/function_filter.h:
##########
@@ -19,14 +19,13 @@
#include <memory>
#include "udf/udf.h"
-#include "udf/udf_internal.h"
namespace doris {
class FunctionFilter {
public:
FunctionFilter(bool opposite, const std::string& col_name,
doris::FunctionContext* fn_ctx,
- doris::StringVal string_param)
+ doris::StringRef string_param)
Review Comment:
warning: variable has incomplete type 'doris::StringRef'
[clang-diagnostic-error]
```cpp
doris::StringRef string_param)
^
```
**be/src/udf/udf.h:35:** forward declaration of 'doris::StringRef'
```cpp
struct StringRef;
^
```
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1681,6 +1821,106 @@
return Status::OK();
}
+Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx,
uint16_t& selected_size,
+ vectorized::Block* block) {
+ SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns);
+ DCHECK(_remaining_vconjunct_root != nullptr);
+ DCHECK(block->rows() != 0);
+ size_t prev_columns = block->columns();
+ Defer defer {[&]() { vectorized::Block::erase_useless_column(block,
prev_columns); }};
+
+ int result_column_id = -1;
+ RETURN_IF_ERROR(_common_vexpr_ctxs_pushdown->execute(block,
&result_column_id));
+ vectorized::ColumnPtr filter_column =
block->get_by_position(result_column_id).column;
+ if (auto* nullable_column =
+
vectorized::check_and_get_column<vectorized::ColumnNullable>(*filter_column)) {
+ vectorized::ColumnPtr nested_column =
nullable_column->get_nested_column_ptr();
+
+ vectorized::MutableColumnPtr mutable_holder =
+ nested_column->use_count() == 1
+ ? nested_column->assume_mutable()
+ : nested_column->clone_resized(nested_column->size());
+
+ vectorized::ColumnUInt8* concrete_column =
+ typeid_cast<vectorized::ColumnUInt8*>(mutable_holder.get());
+ if (!concrete_column) {
+ return Status::InvalidArgument(
+ "Illegal type {} of column for filter. Must be UInt8 or
Nullable(UInt8).",
+ filter_column->get_name());
+ }
+ auto* __restrict null_map =
nullable_column->get_null_map_data().data();
+ vectorized::IColumn::Filter& filter = concrete_column->get_data();
+ auto* __restrict filter_data = filter.data();
+
+ const size_t size = filter.size();
+ for (size_t i = 0; i < size; ++i) {
+ filter_data[i] &= !null_map[i];
+ }
+
+ selected_size = _evaluate_common_expr_filter(sel_rowid_idx,
selected_size, filter);
+ vectorized::Block::filter_block_internal(block, _columns_to_filter,
filter);
+ } else if (auto* const_column =
+
vectorized::check_and_get_column<vectorized::ColumnConst>(*filter_column)) {
+ bool ret = const_column->get_bool(0);
+ if (!ret) {
+ for (auto& col : _columns_to_filter) {
+
std::move(*block->get_by_position(col).column).assume_mutable()->clear();
Review Comment:
warning: std::move of the const expression has no effect; remove std::move()
[performance-move-const-arg]
```suggestion
*block->get_by_position(col).column.assume_mutable()->clear();
```
##########
be/src/vec/data_types/data_type_quantilestate.h:
##########
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "util/quantile_state.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_complex.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+namespace doris::vectorized {
+template <typename T>
+class DataTypeQuantileState : public IDataType {
+public:
+ DataTypeQuantileState() = default;
+ ~DataTypeQuantileState() override = default;
+ using ColumnType = ColumnQuantileState<T>;
+ using FieldType = QuantileState<T>;
+
+ std::string do_get_name() const override { return get_family_name(); }
+ const char* get_family_name() const override { return "QuantileState"; }
+
+ TypeIndex get_type_id() const override { return TypeIndex::QuantileState; }
+ int64_t get_uncompressed_serialized_bytes(const IColumn& column,
+ int be_exec_version) const
override;
+ char* serialize(const IColumn& column, char* buf, int be_exec_version)
const override;
+ const char* deserialize(const char* buf, IColumn* column, int
be_exec_version) const override;
+
+ MutableColumnPtr create_column() const override;
+
+ bool get_is_parametric() const override { return false; }
+ bool have_subtypes() const override { return false; }
+ bool should_align_right_in_pretty_formats() const override { return false;
}
+ bool text_can_contain_only_valid_utf8() const override { return true; }
+ bool is_comparable() const override { return false; }
+ bool is_value_represented_by_number() const override { return false; }
+ bool is_value_represented_by_integer() const override { return false; }
+ bool is_value_represented_by_unsigned_integer() const override { return
false; }
+ // TODO:
+ bool is_value_unambiguously_represented_in_contiguous_memory_region()
const override {
+ return true;
+ }
+ bool have_maximum_size_of_value() const override { return false; }
+
+ bool can_be_used_as_version() const override { return false; }
+
+ bool can_be_inside_nullable() const override { return true; }
+
+ bool equals(const IDataType& rhs) const override { return typeid(rhs) ==
typeid(*this); }
+
+ bool is_categorial() const override { return
is_value_represented_by_integer(); }
+
+ bool can_be_inside_low_cardinality() const override { return false; }
+
+ std::string to_string(const IColumn& column, size_t row_num) const
override {
+ return "QuantileState()";
+ }
+ void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
+
+ [[noreturn]] virtual Field get_default() const override {
Review Comment:
warning: 'virtual' is redundant since the function is already declared
'override' [modernize-use-override]
```suggestion
[[noreturn]] Field get_default() const override {
```
##########
be/src/util/bit_stream_utils.inline.h:
##########
@@ -195,22 +208,57 @@ inline bool BitReader::GetAligned(int num_bytes, T* v) {
return true;
}
-inline bool BitReader::GetVlqInt(int32_t* v) {
- *v = 0;
- int shift = 0;
- int num_bytes = 0;
- uint8_t byte = 0;
- do {
+inline bool BitReader::GetVlqInt(uint32_t* v) {
+ uint32_t tmp = 0;
+ for (int num_bytes = 0; num_bytes < MAX_VLQ_BYTE_LEN; num_bytes++) {
+ uint8_t byte = 0;
if (!GetAligned<uint8_t>(1, &byte)) return false;
- *v |= (byte & 0x7F) << shift;
- shift += 7;
- DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
- } while ((byte & 0x80) != 0);
+ tmp |= static_cast<uint32_t>(byte & 0x7F) << (7 * num_bytes);
+ if ((byte & 0x80) == 0) {
+ *v = tmp;
+ return true;
+ }
+ }
+ return false;
+}
+
+inline bool BitReader::GetZigZagVlqInt(int32_t* v) {
+ uint32_t u;
+ if (!GetVlqInt(&u)) {
+ return false;
+ }
+ u = (u >> 1) ^ (~(u & 1) + 1);
+ // copy uint32_t to int32_t
+ std::memcpy(v, &u, sizeof(uint32_t));
+ return true;
+}
+
+inline bool BitReader::GetVlqInt(uint64_t* v) {
+ uint64_t tmp = 0;
+ for (int num_bytes = 0; num_bytes < MAX_VLQ_BYTE_LEN_FOR_INT64;
num_bytes++) {
+ uint8_t byte = 0;
+ if (!GetAligned<uint8_t>(1, &byte)) return false;
Review Comment:
warning: statement should be inside braces
[readability-braces-around-statements]
```suggestion
if (!GetAligned<uint8_t>(1, &byte)) { return false;
}
```
##########
be/src/vec/exec/scan/pip_scanner_context.h:
##########
@@ -33,20 +33,68 @@ class PipScannerContext : public vectorized::ScannerContext
{
: vectorized::ScannerContext(state, parent, input_tuple_desc,
output_tuple_desc,
scanners, limit,
max_bytes_in_blocks_queue) {}
- void _update_block_queue_empty() override { _blocks_queue_empty =
_blocks_queue.empty(); }
+ Status get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr*
block, bool* eos,
+ int id, bool wait = false) override {
+ {
+ std::unique_lock<std::mutex> l(_transfer_lock);
+ if (state->is_cancelled()) {
Review Comment:
warning: member access into incomplete type 'doris::RuntimeState'
[clang-diagnostic-error]
```cpp
if (state->is_cancelled()) {
^
```
**be/src/udf/udf.h:42:** forward declaration of 'doris::RuntimeState'
```cpp
class RuntimeState;
^
```
##########
be/test/vec/core/column_complex_test.cpp:
##########
@@ -106,4 +142,31 @@
check_serialize_and_deserialize(column);
}
+TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) {
+ auto column = _quantile_state_type.create_column();
Review Comment:
warning: '_quantile_state_type' is a private member of
'doris::vectorized::ColumnQuantileStateTest' [clang-diagnostic-error]
```cpp
auto column = _quantile_state_type.create_column();
^
```
**be/test/vec/core/column_complex_test.cpp:118:** declared private here
```cpp
DataTypeQuantileStateDouble _quantile_state_type;
^
```
##########
be/test/vec/core/column_complex_test.cpp:
##########
@@ -83,7 +84,42 @@
DataTypeBitMap _bitmap_type;
};
-TEST_F(ColumnBitmapTest, SerializeAndDeserialize) {
+class ColumnQuantileStateTest : public testing::Test {
+public:
+ virtual void SetUp() override {}
+ virtual void TearDown() override {}
+
+ void check_bitmap_column(const IColumn& l, const IColumn& r) {
+ ASSERT_EQ(l.size(), r.size());
+ const auto& l_col = assert_cast<const ColumnQuantileStateDouble&>(l);
+ const auto& r_col = assert_cast<const ColumnQuantileStateDouble&>(r);
+ for (size_t i = 0; i < l_col.size(); ++i) {
+ auto& l_value =
const_cast<QuantileStateDouble&>(l_col.get_element(i));
+ auto& r_value =
const_cast<QuantileStateDouble&>(r_col.get_element(i));
+ ASSERT_EQ(l_value.get_serialized_size(),
r_value.get_serialized_size());
+ }
+ }
+
+ void check_serialize_and_deserialize(MutableColumnPtr& col) {
+ auto column = assert_cast<ColumnQuantileStateDouble*>(col.get());
+ auto size = _quantile_state_type.get_uncompressed_serialized_bytes(
+ *column, BeExecVersionManager::get_newest_version());
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ auto result = _quantile_state_type.serialize(*column, buf.get(),
+
BeExecVersionManager::get_newest_version());
+ ASSERT_EQ(result, buf.get() + size);
+
+ auto column2 = _quantile_state_type.create_column();
+ _quantile_state_type.deserialize(buf.get(), column2.get(),
+
BeExecVersionManager::get_newest_version());
+ check_bitmap_column(*column, *column2.get());
+ }
+
+private:
+ DataTypeQuantileStateDouble _quantile_state_type;
+};
+
+TEST_F(ColumnBitmapTest, ColumnBitmapReadWrite) {
auto column = _bitmap_type.create_column();
Review Comment:
warning: '_bitmap_type' is a private member of
'doris::vectorized::ColumnBitmapTest' [clang-diagnostic-error]
```cpp
auto column = _bitmap_type.create_column();
^
```
**be/test/vec/core/column_complex_test.cpp:83:** declared private here
```cpp
DataTypeBitMap _bitmap_type;
^
```
##########
be/test/vec/core/column_complex_test.cpp:
##########
@@ -83,7 +84,42 @@
DataTypeBitMap _bitmap_type;
};
-TEST_F(ColumnBitmapTest, SerializeAndDeserialize) {
+class ColumnQuantileStateTest : public testing::Test {
+public:
+ virtual void SetUp() override {}
+ virtual void TearDown() override {}
Review Comment:
warning: 'virtual' is redundant since the function is already declared
'override' [modernize-use-override]
```suggestion
void TearDown() override {}
```
##########
be/src/vec/exprs/vschema_change_expr.h:
##########
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "vec/common/schema_util.h"
+#include "vec/data_types/data_type_object.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+// Sepecial type of expression which only acts on dynmiac column(ColumnObject)
+// it sends schema change rpc to Frontend to add new generately columns
+// from it's type and name.It contains an inner slot which indicated it's
variant
+// column.
+class VSchemaChangeExpr : public VExpr {
+public:
+ VSchemaChangeExpr(const TExprNode& node) : VExpr(node), _tnode(node) {}
+ ~VSchemaChangeExpr() = default;
Review Comment:
warning: annotate this function with 'override' or (rarely) 'final'
[modernize-use-override]
```suggestion
~VSchemaChangeExpr() override = default;
```
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1584,31 +1650,117 @@
for (int i = 0; i < block->columns(); i++) {
auto cid = _schema.column_id(i);
// todo(wb) abstract make column where
- if (!_is_pred_column[cid]) { // non-predicate
+ if (!_is_pred_column[cid]) {
block->replace_by_position(i,
std::move(_current_return_columns[cid]));
}
}
block->clear_column_data();
return Status::EndOfFile("no more data in segment");
}
- if (!_is_need_vec_eval && !_is_need_short_eval) {
- _replace_version_col(_current_batch_rows_read);
+ if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
_output_non_pred_columns(block);
_output_index_result_column(nullptr, 0, block);
} else {
- _convert_dict_code_for_predicate_if_necessary();
uint16_t selected_size = _current_batch_rows_read;
uint16_t sel_rowid_idx[selected_size];
- // step 1: evaluate vectorization predicate
- selected_size = _evaluate_vectorization_predicate(sel_rowid_idx,
selected_size);
+ if (_is_need_vec_eval || _is_need_short_eval) {
+ _convert_dict_code_for_predicate_if_necessary();
+
+ // step 1: evaluate vectorization predicate
+ selected_size = _evaluate_vectorization_predicate(sel_rowid_idx,
selected_size);
+
+ // step 2: evaluate short circuit predicate
+ // todo(wb) research whether need to read short predicate after
vectorization evaluation
+ // to reduce cost of read short circuit columns.
+ // In SSB test, it make no difference; So need more
scenarios to test
+ selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx,
selected_size);
- // step 2: evaluate short circuit predicate
- // todo(wb) research whether need to read short predicate after
vectorization evaluation
- // to reduce cost of read short circuit columns.
- // In SSB test, it make no difference; So need more scenarios
to test
- selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx,
selected_size);
+ if (selected_size > 0) {
+ // step 3.1: output short circuit and predicate column
+ // when lazy materialization enables, _first_read_column_ids =
distinct(_short_cir_pred_column_ids + _vec_pred_column_ids)
+ // see _vec_init_lazy_materialization
+ // todo(wb) need to tell input columnids from output columnids
+ RETURN_IF_ERROR(_output_column_by_sel_idx(block,
_first_read_column_ids,
+ sel_rowid_idx,
selected_size));
+
+ // step 3.2: read remaining expr column and evaluate it.
+ if (_is_need_expr_eval) {
+ // The predicate column contains the remaining expr
column, no need second read.
+ if (!_second_read_column_ids.empty()) {
+ SCOPED_RAW_TIMER(&_opts.stats->second_read_ns);
+ RETURN_IF_ERROR(_read_columns_by_rowids(
+ _second_read_column_ids, _block_rowids,
sel_rowid_idx,
+ selected_size, &_current_return_columns));
+ if (std::find(_second_read_column_ids.begin(),
+ _second_read_column_ids.end(),
+ _schema.version_col_idx()) !=
_second_read_column_ids.end()) {
+ _replace_version_col(selected_size);
+ }
+ for (auto cid : _second_read_column_ids) {
+ auto loc = _schema_block_id_map[cid];
+ block->replace_by_position(loc,
+
std::move(_current_return_columns[cid]));
+ }
+ }
+
+ DCHECK(block->columns() >
_schema_block_id_map[*_common_expr_columns.begin()]);
+ // block->rows() takes the size of the first column by
default. If the first column is no predicate column,
+ // it has not been read yet. add a const column that has
been read to calculate rows().
+ if (block->rows() == 0) {
+ vectorized::MutableColumnPtr col0 =
+
std::move(*block->get_by_position(0).column).mutate();
+ auto res_column = vectorized::ColumnString::create();
+ res_column->insert_data("", 0);
+ auto col_const =
vectorized::ColumnConst::create(std::move(res_column),
+
selected_size);
+ block->replace_by_position(0, std::move(col_const));
+ _output_index_result_column(sel_rowid_idx,
selected_size, block);
+
block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
+ RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx,
selected_size, block));
+ block->replace_by_position(0, std::move(col0));
+ } else {
+ _output_index_result_column(sel_rowid_idx,
selected_size, block);
+
block->shrink_char_type_column_suffix_zero(_char_type_idx);
+ RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx,
selected_size, block));
+ }
+ }
+ } else if (_is_need_expr_eval) {
+ for (auto cid : _second_read_column_ids) {
+ auto loc = _schema_block_id_map[cid];
+ block->replace_by_position(loc,
std::move(_current_return_columns[cid]));
+ }
+ }
+ } else if (_is_need_expr_eval) {
+ DCHECK(!_first_read_column_ids.empty());
+ // first read all rows are insert block, initialize sel_rowid_idx
to all rows.
+ for (auto cid : _first_read_column_ids) {
+ auto loc = _schema_block_id_map[cid];
+ block->replace_by_position(loc,
std::move(_current_return_columns[cid]));
+ }
+ for (uint32_t i = 0; i < selected_size; ++i) {
+ sel_rowid_idx[i] = i;
+ }
+
+ if (block->rows() == 0) {
+ vectorized::MutableColumnPtr col0 =
+ std::move(*block->get_by_position(0).column).mutate();
Review Comment:
warning: std::move of the const expression has no effect; remove std::move()
[performance-move-const-arg]
```suggestion
*block->get_by_position(0).column.mutate();
```
##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1584,31 +1650,117 @@ Status SegmentIterator::next_batch(vectorized::Block*
block) {
for (int i = 0; i < block->columns(); i++) {
auto cid = _schema.column_id(i);
// todo(wb) abstract make column where
- if (!_is_pred_column[cid]) { // non-predicate
+ if (!_is_pred_column[cid]) {
block->replace_by_position(i,
std::move(_current_return_columns[cid]));
}
}
block->clear_column_data();
return Status::EndOfFile("no more data in segment");
}
- if (!_is_need_vec_eval && !_is_need_short_eval) {
- _replace_version_col(_current_batch_rows_read);
+ if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
_output_non_pred_columns(block);
_output_index_result_column(nullptr, 0, block);
} else {
- _convert_dict_code_for_predicate_if_necessary();
uint16_t selected_size = _current_batch_rows_read;
uint16_t sel_rowid_idx[selected_size];
- // step 1: evaluate vectorization predicate
- selected_size = _evaluate_vectorization_predicate(sel_rowid_idx,
selected_size);
+ if (_is_need_vec_eval || _is_need_short_eval) {
+ _convert_dict_code_for_predicate_if_necessary();
+
+ // step 1: evaluate vectorization predicate
+ selected_size = _evaluate_vectorization_predicate(sel_rowid_idx,
selected_size);
+
+ // step 2: evaluate short circuit predicate
+ // todo(wb) research whether need to read short predicate after
vectorization evaluation
+ // to reduce cost of read short circuit columns.
+ // In SSB test, it make no difference; So need more
scenarios to test
+ selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx,
selected_size);
- // step 2: evaluate short circuit predicate
- // todo(wb) research whether need to read short predicate after
vectorization evaluation
- // to reduce cost of read short circuit columns.
- // In SSB test, it make no difference; So need more scenarios
to test
- selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx,
selected_size);
+ if (selected_size > 0) {
+ // step 3.1: output short circuit and predicate column
+ // when lazy materialization enables, _first_read_column_ids =
distinct(_short_cir_pred_column_ids + _vec_pred_column_ids)
+ // see _vec_init_lazy_materialization
+ // todo(wb) need to tell input columnids from output columnids
+ RETURN_IF_ERROR(_output_column_by_sel_idx(block,
_first_read_column_ids,
+ sel_rowid_idx,
selected_size));
+
+ // step 3.2: read remaining expr column and evaluate it.
+ if (_is_need_expr_eval) {
+ // The predicate column contains the remaining expr
column, no need second read.
+ if (!_second_read_column_ids.empty()) {
+ SCOPED_RAW_TIMER(&_opts.stats->second_read_ns);
+ RETURN_IF_ERROR(_read_columns_by_rowids(
+ _second_read_column_ids, _block_rowids,
sel_rowid_idx,
+ selected_size, &_current_return_columns));
+ if (std::find(_second_read_column_ids.begin(),
+ _second_read_column_ids.end(),
+ _schema.version_col_idx()) !=
_second_read_column_ids.end()) {
+ _replace_version_col(selected_size);
+ }
+ for (auto cid : _second_read_column_ids) {
+ auto loc = _schema_block_id_map[cid];
+ block->replace_by_position(loc,
+
std::move(_current_return_columns[cid]));
+ }
+ }
+
+ DCHECK(block->columns() >
_schema_block_id_map[*_common_expr_columns.begin()]);
+ // block->rows() takes the size of the first column by
default. If the first column is no predicate column,
+ // it has not been read yet. add a const column that has
been read to calculate rows().
+ if (block->rows() == 0) {
+ vectorized::MutableColumnPtr col0 =
+
std::move(*block->get_by_position(0).column).mutate();
Review Comment:
warning: std::move of the const expression has no effect; remove std::move()
[performance-move-const-arg]
```suggestion
*block->get_by_position(0).column.mutate();
```
##########
be/test/vec/aggregate_functions/agg_histogram_test.cpp:
##########
@@ -115,8 +110,7 @@ class VAggHistogramTest : public testing::Test {
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
- agg_histogram_add_elements<DataType>(agg_function, place, input_rows,
sample_rate,
- max_bucket_num);
+ agg_histogram_add_elements<DataType>(agg_function, place, input_rows,
max_num_buckets);
ColumnString buf;
Review Comment:
warning: calling a private constructor of class
'doris::vectorized::ColumnString' [clang-diagnostic-error]
```cpp
ColumnString buf;
^
```
**be/src/vec/columns/column_string.h:76:** declared private here
```cpp
ColumnString() = default;
^
```
##########
be/test/vec/core/column_complex_test.cpp:
##########
@@ -83,7 +84,42 @@ class ColumnBitmapTest : public testing::Test {
DataTypeBitMap _bitmap_type;
};
-TEST_F(ColumnBitmapTest, SerializeAndDeserialize) {
+class ColumnQuantileStateTest : public testing::Test {
+public:
+ virtual void SetUp() override {}
Review Comment:
warning: 'virtual' is redundant since the function is already declared
'override' [modernize-use-override]
```suggestion
void SetUp() override {}
```
##########
be/src/vec/functions/array/function_array_enumerate_uniq.cpp:
##########
@@ -0,0 +1,250 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <vec/columns/column_array.h>
+#include <vec/columns/column_nullable.h>
+#include <vec/columns/columns_number.h>
+#include <vec/common/columns_hashing.h>
+#include <vec/common/hash_table/hash_map.h>
+#include <vec/data_types/data_type_array.h>
+#include <vec/data_types/data_type_number.h>
+#include <vec/functions/function.h>
+#include <vec/functions/function_helpers.h>
+#include <vec/functions/simple_function_factory.h>
+
+namespace doris::vectorized {
+
+class FunctionArrayEnumerateUniq : public IFunction {
+private:
+ static constexpr size_t INITIAL_SIZE_DEGREE = 5;
+
+public:
+ using NullMapType = PaddedPODArray<UInt8>;
+ static constexpr auto name = "array_enumerate_uniq";
+ static FunctionPtr create() { return
std::make_shared<FunctionArrayEnumerateUniq>(); }
+ String get_name() const override { return name; }
+ bool is_variadic() const override { return true; }
+ size_t get_number_of_arguments() const override { return 1; }
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ if (arguments.empty()) {
+ LOG(FATAL) << "Incorrect number of arguments for
array_enumerate_uniq function";
+ }
+ bool is_nested_nullable = false;
+ for (size_t i = 0; i < arguments.size(); ++i) {
+ const DataTypeArray* array_type =
+
check_and_get_data_type<DataTypeArray>(remove_nullable(arguments[i]).get());
+ if (!array_type) {
+ LOG(FATAL) << "The " << i
+ << "-th argument for function " + get_name() +
+ " must be an array but it has type " +
+ arguments[i]->get_name() + ".";
+ }
+ if (i == 0) {
+ is_nested_nullable =
array_type->get_nested_type()->is_nullable();
+ }
+ }
+
+ auto return_nested_type = std::make_shared<DataTypeInt64>();
+ DataTypePtr return_type = std::make_shared<DataTypeArray>(
+ is_nested_nullable ? make_nullable(return_nested_type) :
return_nested_type);
+ if (arguments.size() == 1 && arguments[0]->is_nullable()) {
+ return_type = make_nullable(return_type);
+ }
+ return return_type;
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ ColumnRawPtrs data_columns(arguments.size());
+ const ColumnArray::Offsets64* offsets = nullptr;
+ ColumnPtr src_offsets;
+ Columns src_columns; // to keep ownership
+
+ const ColumnArray* first_column_array = nullptr;
+
+ for (size_t i = 0; i < arguments.size(); i++) {
+ src_columns.emplace_back(
+
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const());
+ ColumnPtr& cur_column = src_columns[i];
+ const ColumnArray* array =
+
check_and_get_column<ColumnArray>(remove_nullable(cur_column->get_ptr()));
+ if (!array) {
+ return Status::RuntimeError(
+ fmt::format("Illegal column {}, of first argument of
function {}",
+ cur_column->get_name(), get_name()));
+ }
+
+ const ColumnArray::Offsets64& cur_offsets = array->get_offsets();
+ if (i == 0) {
+ first_column_array = array;
+ offsets = &cur_offsets;
+ src_offsets = array->get_offsets_ptr();
+ } else if (*offsets != cur_offsets) {
+ return Status::RuntimeError(fmt::format(
+ "lengths of all arrays of fucntion {} must be equal.",
get_name()));
+ }
+ const auto* array_data = &array->get_data();
+ data_columns[i] = array_data;
+ }
+
+ const NullMapType* null_map = nullptr;
+ if (arguments.size() == 1 && data_columns[0]->is_nullable()) {
+ const ColumnNullable* nullable =
check_and_get_column<ColumnNullable>(*data_columns[0]);
+ data_columns[0] = nullable->get_nested_column_ptr();
+ null_map = &nullable->get_null_map_column().get_data();
+ }
+
+ auto dst_nested_column = ColumnInt64::create();
+ ColumnInt64::Container& dst_values = dst_nested_column->get_data();
+ dst_values.resize(offsets->back());
+
+ if (arguments.size() == 1) {
+ DataTypePtr src_column_type =
block.get_by_position(arguments[0]).type;
+ if (src_column_type->is_nullable()) {
+ src_column_type =
+ assert_cast<const
DataTypeNullable&>(*src_column_type).get_nested_type();
+ }
+ auto nested_type =
+ assert_cast<const
DataTypeArray&>(*src_column_type).get_nested_type();
+ WhichDataType which(remove_nullable(nested_type));
+ if (which.is_uint8()) {
+ _execute_number<ColumnUInt8>(data_columns, *offsets, null_map,
dst_values);
+ } else if (which.is_int8()) {
+ _execute_number<ColumnInt8>(data_columns, *offsets, null_map,
dst_values);
+ } else if (which.is_int16()) {
+ _execute_number<ColumnInt16>(data_columns, *offsets, null_map,
dst_values);
+ } else if (which.is_int32()) {
+ _execute_number<ColumnInt32>(data_columns, *offsets, null_map,
dst_values);
+ } else if (which.is_int64()) {
+ _execute_number<ColumnInt64>(data_columns, *offsets, null_map,
dst_values);
+ } else if (which.is_int128()) {
+ _execute_number<ColumnInt128>(data_columns, *offsets,
null_map, dst_values);
+ } else if (which.is_float32()) {
+ _execute_number<ColumnFloat32>(data_columns, *offsets,
null_map, dst_values);
+ } else if (which.is_float64()) {
+ _execute_number<ColumnFloat64>(data_columns, *offsets,
null_map, dst_values);
+ } else if (which.is_date()) {
+ _execute_number<ColumnDate>(data_columns, *offsets, null_map,
dst_values);
+ } else if (which.is_date_time()) {
+ _execute_number<ColumnDateTime>(data_columns, *offsets,
null_map, dst_values);
+ } else if (which.is_date_v2()) {
+ _execute_number<ColumnDateV2>(data_columns, *offsets,
null_map, dst_values);
+ } else if (which.is_date_time_v2()) {
+ _execute_number<ColumnDateTimeV2>(data_columns, *offsets,
null_map, dst_values);
+ } else if (which.is_decimal128()) {
+ _execute_number<ColumnDecimal128>(data_columns, *offsets,
null_map, dst_values);
+ } else if (which.is_string()) {
+ _execute_string(data_columns, *offsets, null_map, dst_values);
+ }
+ } else {
+ using HashMapContainer =
+ HashMapWithStackMemory<UInt128, Int64, UInt128TrivialHash,
INITIAL_SIZE_DEGREE>;
+ using HashMethod =
ColumnsHashing::HashMethodHashed<DataTypeUInt64, Int64, false>;
+ _execute_by_hash<HashMapContainer, HashMethod,
false>(data_columns, *offsets, nullptr,
+ dst_values);
+ }
+
+ ColumnPtr nested_column = dst_nested_column->get_ptr();
+ if (first_column_array->get_data().is_nullable()) {
+ nested_column = ColumnNullable::create(nested_column,
+
ColumnUInt8::create(nested_column->size(), 0));
+ }
+ ColumnPtr res_column = ColumnArray::create(std::move(nested_column),
src_offsets);
Review Comment:
warning: passing result of std::move() as a const reference argument; no
move will actually happen [performance-move-const-arg]
```suggestion
ColumnPtr res_column = ColumnArray::create(nested_column,
src_offsets);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]