This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fe7ff6f113d [Opt](functions) Opt tvf number for performance regression
framework (#27582)
fe7ff6f113d is described below
commit fe7ff6f113d48e76613785d0d2a392bf83b08cca
Author: zclllyybb <[email protected]>
AuthorDate: Tue Nov 28 10:43:51 2023 +0800
[Opt](functions) Opt tvf number for performance regression framework
(#27582)
Opt tvf number for performance regression framework
---
be/src/vec/columns/column_vector.h | 12 +++-
.../vec/exec/data_gen_functions/vnumbers_tvf.cpp | 67 +++++++++++-----------
be/src/vec/exec/data_gen_functions/vnumbers_tvf.h | 2 +-
3 files changed, 46 insertions(+), 35 deletions(-)
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index 00a49835c6b..77df238d2aa 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -144,7 +144,7 @@ public:
using Container = PaddedPODArray<value_type>;
private:
- ColumnVector() {}
+ ColumnVector() = default;
ColumnVector(const size_t n) : data(n) {}
ColumnVector(const size_t n, const value_type x) : data(n, x) {}
ColumnVector(const ColumnVector& src) : data(src.data.begin(),
src.data.end()) {}
@@ -177,7 +177,7 @@ public:
size_t size() const override { return data.size(); }
StringRef get_data_at(size_t n) const override {
- return StringRef(reinterpret_cast<const char*>(&data[n]),
sizeof(data[n]));
+ return {reinterpret_cast<const char*>(&data[n]), sizeof(data[n])};
}
void insert_from(const IColumn& src, size_t n) override {
@@ -195,6 +195,14 @@ public:
memcpy(data.data() + old_size, data_ptr, num * sizeof(T));
}
+ void insert_range_of_integer(T begin, T end) {
+ auto old_size = data.size();
+ data.resize(old_size + (end - begin));
+ for (int i = 0; i < end - begin; i++) {
+ data[old_size + i] = begin + i;
+ }
+ }
+
void insert_date_column(const char* data_ptr, size_t num) {
data.reserve(data.size() + num);
constexpr size_t input_value_size = sizeof(uint24_t);
diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp
b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp
index 2ac6c0fca42..4c37dc3a35e 100644
--- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp
+++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp
@@ -21,14 +21,18 @@
#include <gen_cpp/PlanNodes_types.h>
#include <glog/logging.h>
+#include <algorithm>
#include <utility>
#include "common/status.h"
#include "runtime/descriptors.h"
#include "runtime/runtime_state.h"
#include "vec/columns/column.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
+#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
namespace doris::vectorized {
@@ -37,45 +41,44 @@ VNumbersTVF::VNumbersTVF(TupleId tuple_id, const
TupleDescriptor* tuple_desc)
: VDataGenFunctionInf(tuple_id, tuple_desc) {}
Status VNumbersTVF::get_next(RuntimeState* state, vectorized::Block* block,
bool* eos) {
- bool mem_reuse = block->mem_reuse();
DCHECK(block->rows() == 0);
+ RETURN_IF_CANCELLED(state);
+ bool mem_reuse = block->mem_reuse();
+ int batch_size = state->batch_size();
std::vector<vectorized::MutableColumnPtr> columns(_slot_num);
- do {
- for (int i = 0; i < _slot_num; ++i) {
- if (mem_reuse) {
- columns[i] =
std::move(*(block->get_by_position(i).column)).mutate();
- } else {
- columns[i] =
_tuple_desc->slots()[i]->get_empty_mutable_column();
- }
+ // now only support one column for tvf numbers
+ for (int i = 0; i < _slot_num; ++i) {
+ if (mem_reuse) {
+ columns[i] =
std::move(*(block->get_by_position(i).column)).mutate();
+ } else {
+ columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column();
}
- while (true) {
- RETURN_IF_CANCELLED(state);
- int batch_size = state->batch_size();
- if (columns[0]->size() == batch_size) {
- // what if batch_size < _total_numbers, should we set *eos?
- break;
- }
- // if _total_numbers == 0, so we can break loop at now.
- if (_cur_offset >= _total_numbers) {
- *eos = true;
- break;
- }
- columns[0]->insert_data(reinterpret_cast<const
char*>(&_cur_offset),
- sizeof(_cur_offset));
- ++_cur_offset;
+
+ if (_total_numbers <= 0) [[unlikely]] {
+ *eos = true;
+ continue;
}
- auto n_columns = 0;
- if (!mem_reuse) {
- for (const auto slot_desc : _tuple_desc->slots()) {
-
block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]),
-
slot_desc->get_data_type_ptr(),
- slot_desc->col_name()));
- }
+ auto* column_res = assert_cast<ColumnInt64*>(columns[i].get());
//BIGINT
+ int64_t end_value = std::min((int64_t)(_next_number + batch_size),
_total_numbers);
+ column_res->insert_range_of_integer(_next_number, end_value);
+ if (end_value == _total_numbers) {
+ *eos = true;
} else {
- columns.clear();
+ _next_number = end_value;
+ }
+ }
+
+ if (mem_reuse) {
+ columns.clear();
+ } else {
+ size_t n_columns = 0;
+ for (const auto* slot_desc : _tuple_desc->slots()) {
+
block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]),
+ slot_desc->get_data_type_ptr(),
+ slot_desc->col_name()));
}
- } while (block->rows() == 0 && !(*eos));
+ }
return Status::OK();
}
diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h
b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h
index f65a777051b..310571ee765 100644
--- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h
+++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h
@@ -47,7 +47,7 @@ protected:
int64_t _total_numbers;
// Number of returned columns, actually only 1 column
int _slot_num = 1;
- int64_t _cur_offset = 0;
+ int64_t _next_number = 0;
};
} // namespace vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]