This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 021678c7c30 [fix](window_funnel) fix wrong result of window_funnel
#38954 (#39270)
021678c7c30 is described below
commit 021678c7c302abab23a3705f5a114b6f9ca7f5e6
Author: TengJianPing <[email protected]>
AuthorDate: Fri Aug 16 09:59:31 2024 +0800
[fix](window_funnel) fix wrong result of window_funnel #38954 (#39270)
## Proposed changes
BP #38954
---
be/src/agent/be_exec_version_manager.h | 2 +-
be/src/util/simd/bits.h | 16 +
.../aggregate_function_simple_factory.cpp | 2 +
.../aggregate_function_window_funnel.cpp | 37 +-
.../aggregate_function_window_funnel.h | 387 ++++++++++++++++++++-
.../aggregate_functions/vec_window_funnel_test.cpp | 9 +-
.../main/java/org/apache/doris/common/Config.java | 2 +-
.../data/nereids_p0/aggregate/window_funnel.out | 99 ++++++
.../window_functions/window_funnel.out | 5 +
.../nereids_p0/aggregate/window_funnel.groovy | 361 +++++++++++++++++++
.../window_functions/window_funnel.sql | 71 ++--
11 files changed, 928 insertions(+), 63 deletions(-)
diff --git a/be/src/agent/be_exec_version_manager.h
b/be/src/agent/be_exec_version_manager.h
index 248bcfc7e1a..4cd1a85622c 100644
--- a/be/src/agent/be_exec_version_manager.h
+++ b/be/src/agent/be_exec_version_manager.h
@@ -71,7 +71,7 @@ private:
* g. do local merge of remote runtime filter
* h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS
*/
-constexpr inline int BeExecVersionManager::max_be_exec_version = 4;
+constexpr inline int BeExecVersionManager::max_be_exec_version = 5;
constexpr inline int BeExecVersionManager::min_be_exec_version = 0;
/// functional
diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h
index 45f82b23ac9..a36a95b6eef 100644
--- a/be/src/util/simd/bits.h
+++ b/be/src/util/simd/bits.h
@@ -136,6 +136,18 @@ static size_t find_byte(const std::vector<T>& vec, size_t
start, T byte) {
return (T*)p - vec.data();
}
+template <class T>
+static size_t find_byte(const T* data, size_t start, size_t end, T byte) {
+ if (start >= end) {
+ return start;
+ }
+ const void* p = std::memchr((const void*)(data + start), byte, end -
start);
+ if (p == nullptr) {
+ return end;
+ }
+ return (T*)p - data;
+}
+
template <typename T>
bool contain_byte(const T* __restrict data, const size_t length, const signed
char byte) {
return nullptr != std::memchr(reinterpret_cast<const void*>(data), byte,
length);
@@ -145,6 +157,10 @@ inline size_t find_one(const std::vector<uint8_t>& vec,
size_t start) {
return find_byte<uint8_t>(vec, start, 1);
}
+inline size_t find_one(const uint8_t* data, size_t start, size_t end) {
+ return find_byte<uint8_t>(data, start, end, 1);
+}
+
inline size_t find_zero(const std::vector<uint8_t>& vec, size_t start) {
return find_byte<uint8_t>(vec, start, 0);
}
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
index cbae8cd28fe..f91311cdf92 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
@@ -54,6 +54,7 @@ void
register_aggregate_function_group_array_intersect(AggregateFunctionSimpleFa
void register_aggregate_function_group_concat(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_percentile(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory&
factory);
+void
register_aggregate_function_window_funnel_old(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_retention(AggregateFunctionSimpleFactory&
factory);
void
register_aggregate_function_percentile_approx(AggregateFunctionSimpleFactory&
factory);
void
register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactory&
factory);
@@ -96,6 +97,7 @@ AggregateFunctionSimpleFactory&
AggregateFunctionSimpleFactory::instance() {
register_aggregate_function_percentile(instance);
register_aggregate_function_percentile_approx(instance);
register_aggregate_function_window_funnel(instance);
+ register_aggregate_function_window_funnel_old(instance);
register_aggregate_function_retention(instance);
register_aggregate_function_orthogonal_bitmap(instance);
register_aggregate_function_collect_list(instance);
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp
b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp
index 6d9ca27fe10..afd6ceb569c 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp
@@ -24,11 +24,33 @@
#include "common/logging.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
#include "vec/aggregate_functions/helpers.h"
+#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_nullable.h"
namespace doris::vectorized {
+AggregateFunctionPtr create_aggregate_function_window_funnel_old(const
std::string& name,
+ const
DataTypes& argument_types,
+ const bool
result_is_nullable) {
+ if (argument_types.size() < 3) {
+ LOG(WARNING) << "window_funnel's argument less than 3.";
+ return nullptr;
+ }
+ if (WhichDataType(remove_nullable(argument_types[2])).is_date_time_v2()) {
+ return creator_without_type::create<
+
AggregateFunctionWindowFunnelOld<DateV2Value<DateTimeV2ValueType>, UInt64>>(
+ argument_types, result_is_nullable);
+ } else if
(WhichDataType(remove_nullable(argument_types[2])).is_date_time()) {
+ return creator_without_type::create<
+ AggregateFunctionWindowFunnelOld<VecDateTimeValue,
Int64>>(argument_types,
+
result_is_nullable);
+ } else {
+ LOG(WARNING) << "Only support DateTime type as window argument!";
+ return nullptr;
+ }
+}
+
AggregateFunctionPtr create_aggregate_function_window_funnel(const
std::string& name,
const DataTypes&
argument_types,
const bool
result_is_nullable) {
@@ -38,11 +60,12 @@ AggregateFunctionPtr
create_aggregate_function_window_funnel(const std::string&
}
if (WhichDataType(remove_nullable(argument_types[2])).is_date_time_v2()) {
return creator_without_type::create<
-
AggregateFunctionWindowFunnel<DateV2Value<DateTimeV2ValueType>, UInt64>>(
- argument_types, result_is_nullable);
+ AggregateFunctionWindowFunnel<TypeIndex::DateTimeV2,
UInt64>>(argument_types,
+
result_is_nullable);
} else if
(WhichDataType(remove_nullable(argument_types[2])).is_date_time()) {
- return
creator_without_type::create<AggregateFunctionWindowFunnel<VecDateTimeValue,
Int64>>(
- argument_types, result_is_nullable);
+ return creator_without_type::create<
+ AggregateFunctionWindowFunnel<TypeIndex::DateTime,
Int64>>(argument_types,
+
result_is_nullable);
} else {
LOG(WARNING) << "Only support DateTime type as window argument!";
return nullptr;
@@ -52,4 +75,10 @@ AggregateFunctionPtr
create_aggregate_function_window_funnel(const std::string&
void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory&
factory) {
factory.register_function_both("window_funnel",
create_aggregate_function_window_funnel);
}
+void
register_aggregate_function_window_funnel_old(AggregateFunctionSimpleFactory&
factory) {
+ factory.register_alternative_function("window_funnel",
+
create_aggregate_function_window_funnel_old, true);
+ factory.register_alternative_function("window_funnel",
+
create_aggregate_function_window_funnel_old, false);
+}
} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
index 9356cfd4b68..3ab93646775 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h
@@ -21,26 +21,29 @@
#pragma once
-#include <stddef.h>
-#include <stdint.h>
+#include <gen_cpp/data.pb.h>
#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <iterator>
#include <memory>
#include <optional>
+#include <type_traits>
#include <utility>
-#include <vector>
#include "agent/be_exec_version_manager.h"
#include "common/compiler_util.h"
+#include "common/exception.h"
#include "util/binary_cast.hpp"
+#include "util/simd/bits.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column_string.h"
-#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
+#include "vec/core/sort_block.h"
+#include "vec/core/sort_description.h"
#include "vec/core/types.h"
+#include "vec/data_types/data_type_factory.hpp"
#include "vec/data_types/data_type_number.h"
#include "vec/io/var_int.h"
#include "vec/runtime/vdatetime_value.h"
@@ -72,8 +75,359 @@ WindowFunnelMode string_to_window_funnel_mode(const String&
string) {
}
}
-template <typename DateValueType, typename NativeType>
+template <TypeIndex TYPE_INDEX, typename NativeType>
struct WindowFunnelState {
+ using DateValueType = std::conditional_t<TYPE_INDEX ==
TypeIndex::DateTimeV2,
+ DateV2Value<DateTimeV2ValueType>,
VecDateTimeValue>;
+ int event_count = 0;
+ int64_t window;
+ bool enable_mode;
+ WindowFunnelMode window_funnel_mode;
+ mutable vectorized::MutableBlock mutable_block;
+ ColumnVector<NativeType>::Container* timestamp_column_data;
+ std::vector<ColumnVector<UInt8>::Container*> event_columns_datas;
+ SortDescription sort_description {1};
+ bool sorted;
+
+ WindowFunnelState() {
+ event_count = 0;
+ window = 0;
+ window_funnel_mode = WindowFunnelMode::INVALID;
+
+ sort_description[0].column_number = 0;
+ sort_description[0].direction = 1;
+ sort_description[0].nulls_direction = -1;
+ sorted = false;
+ }
+ WindowFunnelState(int arg_event_count) : WindowFunnelState() {
+ event_count = arg_event_count;
+ auto timestamp_column = ColumnVector<NativeType>::create();
+ timestamp_column_data =
+
&assert_cast<ColumnVector<NativeType>&>(*timestamp_column).get_data();
+
+ MutableColumns event_columns;
+ for (int i = 0; i < event_count; i++) {
+ auto event_column = ColumnVector<UInt8>::create();
+ event_columns_datas.emplace_back(
+
&assert_cast<ColumnVector<UInt8>&>(*event_column).get_data());
+ event_columns.emplace_back(std::move(event_column));
+ }
+ Block tmp_block;
+ tmp_block.insert({std::move(timestamp_column),
+
DataTypeFactory::instance().create_data_type(TYPE_INDEX), "timestamp"});
+ for (int i = 0; i < event_count; i++) {
+ tmp_block.insert({std::move(event_columns[i]),
+
DataTypeFactory::instance().create_data_type(TypeIndex::UInt8),
+ "event_" + std::to_string(i)});
+ }
+
+ mutable_block = MutableBlock(std::move(tmp_block));
+ }
+
+ void reset() {
+ window = 0;
+ mutable_block.clear();
+ timestamp_column_data = nullptr;
+ event_columns_datas.clear();
+ sorted = false;
+ }
+
+ void add(const IColumn** arg_columns, ssize_t row_num, int64_t win,
WindowFunnelMode mode) {
+ window = win;
+ window_funnel_mode = enable_mode ? mode : WindowFunnelMode::DEFAULT;
+
+ timestamp_column_data->push_back(
+ assert_cast<const
ColumnVector<NativeType>&>(*arg_columns[2]).get_data()[row_num]);
+ for (int i = 0; i < event_count; i++) {
+ event_columns_datas[i]->push_back(
+ assert_cast<const ColumnVector<UInt8>&>(*arg_columns[3 +
i])
+ .get_data()[row_num]);
+ }
+ }
+
+ void sort() {
+ if (sorted) {
+ return;
+ }
+
+ Block tmp_block = mutable_block.to_block();
+ auto block = tmp_block.clone_without_columns();
+ sort_block(tmp_block, block, sort_description, 0);
+ mutable_block = MutableBlock(std::move(block));
+ sorted = true;
+ }
+
+ template <WindowFunnelMode WINDOW_FUNNEL_MODE>
+ int _match_event_list(size_t& start_row, size_t row_count,
+ const NativeType* timestamp_data) const {
+ int matched_count = 0;
+ DateValueType start_timestamp;
+ DateValueType end_timestamp;
+ TimeInterval interval(SECOND, window, false);
+
+ int column_idx = 1;
+ const auto& first_event_column =
mutable_block.get_column_by_position(column_idx);
+ const auto& first_event_data =
+ assert_cast<const
ColumnVector<UInt8>&>(*first_event_column).get_data();
+ auto match_row = simd::find_one(first_event_data.data(), start_row,
row_count);
+ start_row = match_row + 1;
+ if (match_row < row_count) {
+ auto prev_timestamp = binary_cast<NativeType,
DateValueType>(timestamp_data[match_row]);
+ end_timestamp = prev_timestamp;
+ end_timestamp.template date_add_interval<SECOND>(interval);
+
+ matched_count++;
+
+ column_idx++;
+ auto last_match_row = match_row;
+ for (; column_idx < event_count + 1; column_idx++) {
+ const auto& event_column =
mutable_block.get_column_by_position(column_idx);
+ const auto& event_data =
+ assert_cast<const
ColumnVector<UInt8>&>(*event_column).get_data();
+ if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::FIXED) {
+ ++match_row;
+ if (event_data[match_row] == 1) {
+ auto current_timestamp =
+ binary_cast<NativeType,
DateValueType>(timestamp_data[match_row]);
+ if (current_timestamp <= end_timestamp) {
+ matched_count++;
+ continue;
+ }
+ }
+ break;
+ }
+ match_row = simd::find_one(event_data.data(), match_row + 1,
row_count);
+ if (match_row < row_count) {
+ auto current_timestamp =
+ binary_cast<NativeType,
DateValueType>(timestamp_data[match_row]);
+ bool is_matched = current_timestamp <= end_timestamp;
+ if (is_matched) {
+ if constexpr (WINDOW_FUNNEL_MODE ==
WindowFunnelMode::INCREASE) {
+ is_matched = current_timestamp > prev_timestamp;
+ }
+ }
+ if (!is_matched) {
+ break;
+ }
+ if constexpr (WINDOW_FUNNEL_MODE ==
WindowFunnelMode::INCREASE) {
+ prev_timestamp =
+ binary_cast<NativeType,
DateValueType>(timestamp_data[match_row]);
+ }
+ if constexpr (WINDOW_FUNNEL_MODE ==
WindowFunnelMode::DEDUPLICATION) {
+ bool is_dup = false;
+ if (match_row != last_match_row + 1) {
+ for (int tmp_column_idx = 1; tmp_column_idx <
column_idx;
+ tmp_column_idx++) {
+ const auto& tmp_event_column =
+
mutable_block.get_column_by_position(tmp_column_idx);
+ const auto& tmp_event_data =
+ assert_cast<const
ColumnVector<UInt8>&>(*tmp_event_column)
+ .get_data();
+ auto dup_match_row =
simd::find_one(tmp_event_data.data(),
+
last_match_row + 1, match_row);
+ if (dup_match_row < match_row) {
+ is_dup = true;
+ break;
+ }
+ }
+ }
+ if (is_dup) {
+ break;
+ }
+ last_match_row = match_row;
+ }
+ matched_count++;
+ } else {
+ break;
+ }
+ }
+ }
+ return matched_count;
+ }
+
+ template <WindowFunnelMode WINDOW_FUNNEL_MODE>
+ int _get_internal() const {
+ size_t start_row = 0;
+ int max_found_event_count = 0;
+ const auto& ts_column =
mutable_block.get_column_by_position(0)->get_ptr();
+ const auto& timestamp_data =
+ assert_cast<const
ColumnVector<NativeType>&>(*ts_column).get_data().data();
+
+ auto row_count = mutable_block.rows();
+ while (start_row < row_count) {
+ auto found_event_count =
+ _match_event_list<WINDOW_FUNNEL_MODE>(start_row,
row_count, timestamp_data);
+ if (found_event_count == event_count) {
+ return found_event_count;
+ }
+ max_found_event_count = std::max(max_found_event_count,
found_event_count);
+ }
+ return max_found_event_count;
+ }
+ int get() const {
+ auto row_count = mutable_block.rows();
+ if (event_count == 0 || row_count == 0) {
+ return 0;
+ }
+ switch (window_funnel_mode) {
+ case WindowFunnelMode::DEFAULT:
+ return _get_internal<WindowFunnelMode::DEFAULT>();
+ case WindowFunnelMode::DEDUPLICATION:
+ return _get_internal<WindowFunnelMode::DEDUPLICATION>();
+ case WindowFunnelMode::FIXED:
+ return _get_internal<WindowFunnelMode::FIXED>();
+ case WindowFunnelMode::INCREASE:
+ return _get_internal<WindowFunnelMode::INCREASE>();
+ default:
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid
window_funnel mode");
+ return 0;
+ }
+ }
+
+ void merge(const WindowFunnelState& other) {
+ if (!other.mutable_block.empty()) {
+ auto st = mutable_block.merge(other.mutable_block.to_block());
+ if (!st.ok()) {
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR,
st.to_string());
+ return;
+ }
+ }
+
+ event_count = event_count > 0 ? event_count : other.event_count;
+ window = window > 0 ? window : other.window;
+ if (enable_mode) {
+ window_funnel_mode = window_funnel_mode ==
WindowFunnelMode::INVALID
+ ? other.window_funnel_mode
+ : window_funnel_mode;
+ } else {
+ window_funnel_mode = WindowFunnelMode::DEFAULT;
+ }
+ }
+
+ void write(BufferWritable& out) const {
+ write_var_int(event_count, out);
+ write_var_int(window, out);
+ if (enable_mode) {
+
write_var_int(static_cast<std::underlying_type_t<WindowFunnelMode>>(window_funnel_mode),
+ out);
+ }
+ PBlock pblock;
+ size_t uncompressed_bytes = 0;
+ size_t compressed_bytes = 0;
+ Status status;
+ std::string buff;
+ Block block = mutable_block.to_block();
+ status = block.serialize(
+ BeExecVersionManager::get_newest_version(), &pblock,
&uncompressed_bytes,
+ &compressed_bytes,
+ segment_v2::CompressionTypePB::ZSTD); // ZSTD for better
compression ratio
+ if (!status.ok()) {
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR,
status.to_string());
+ return;
+ }
+ if (!pblock.SerializeToString(&buff)) {
+ throw doris::Exception(ErrorCode::SERIALIZE_PROTOBUF_ERROR,
+ "Serialize window_funnel data");
+ return;
+ }
+ auto data_bytes = buff.size();
+ write_var_uint(data_bytes, out);
+ out.write(buff.data(), data_bytes);
+ }
+
+ void read(BufferReadable& in) {
+ int64_t event_level;
+ read_var_int(event_level, in);
+ event_count = (int)event_level;
+ read_var_int(window, in);
+ window_funnel_mode = WindowFunnelMode::DEFAULT;
+ if (enable_mode) {
+ int64_t mode;
+ read_var_int(mode, in);
+ window_funnel_mode = static_cast<WindowFunnelMode>(mode);
+ }
+ size_t data_bytes = 0;
+ read_var_uint(data_bytes, in);
+ std::string buff;
+ buff.resize(data_bytes);
+ in.read(buff.data(), data_bytes);
+
+ PBlock pblock;
+ if (!pblock.ParseFromArray(buff.data(), data_bytes)) {
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR,
+ "Failed to parse window_funnel data to
block");
+ }
+ Block block;
+ auto status = block.deserialize(pblock);
+ if (!status.ok()) {
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR,
status.to_string());
+ }
+ mutable_block = MutableBlock(std::move(block));
+ }
+};
+
+template <TypeIndex TYPE_INDEX, typename NativeType>
+class AggregateFunctionWindowFunnel
+ : public IAggregateFunctionDataHelper<
+ WindowFunnelState<TYPE_INDEX, NativeType>,
+ AggregateFunctionWindowFunnel<TYPE_INDEX, NativeType>> {
+public:
+ AggregateFunctionWindowFunnel(const DataTypes& argument_types_)
+ : IAggregateFunctionDataHelper<WindowFunnelState<TYPE_INDEX,
NativeType>,
+
AggregateFunctionWindowFunnel<TYPE_INDEX, NativeType>>(
+ argument_types_) {}
+
+ void create(AggregateDataPtr __restrict place) const override {
+ auto data = new (place) WindowFunnelState<TYPE_INDEX, NativeType>(
+ IAggregateFunction::get_argument_types().size() - 3);
+ /// support window funnel mode from 2.0. See
`BeExecVersionManager::max_be_exec_version`
+ data->enable_mode = version >= 3;
+ }
+
+ String get_name() const override { return "window_funnel"; }
+
+ DataTypePtr get_return_type() const override { return
std::make_shared<DataTypeInt32>(); }
+
+ void reset(AggregateDataPtr __restrict place) const override {
this->data(place).reset(); }
+
+ void add(AggregateDataPtr __restrict place, const IColumn** columns,
ssize_t row_num,
+ Arena*) const override {
+ const auto& window =
+ assert_cast<const
ColumnVector<Int64>&>(*columns[0]).get_data()[row_num];
+ StringRef mode = columns[1]->get_data_at(row_num);
+ this->data(place).add(columns, row_num, window,
+ string_to_window_funnel_mode(mode.to_string()));
+ }
+
+ void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+ Arena*) const override {
+ this->data(place).merge(this->data(rhs));
+ }
+
+ void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
+ this->data(place).write(buf);
+ }
+
+ void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+ Arena*) const override {
+ this->data(place).read(buf);
+ }
+
+ void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
+ this->data(const_cast<AggregateDataPtr>(place)).sort();
+ assert_cast<ColumnInt32&>(to).get_data().push_back(
+ IAggregateFunctionDataHelper<
+ WindowFunnelState<TYPE_INDEX, NativeType>,
+ AggregateFunctionWindowFunnel<TYPE_INDEX,
NativeType>>::data(place)
+ .get());
+ }
+
+protected:
+ using IAggregateFunction::version;
+};
+
+template <typename DateValueType, typename NativeType>
+struct WindowFunnelStateOld {
std::vector<std::pair<DateValueType, int>> events;
int max_event_level;
bool sorted;
@@ -81,7 +435,7 @@ struct WindowFunnelState {
WindowFunnelMode window_funnel_mode;
bool enable_mode;
- WindowFunnelState() {
+ WindowFunnelStateOld() {
sorted = true;
max_event_level = 0;
window = 0;
@@ -185,7 +539,7 @@ struct WindowFunnelState {
return 0;
}
- void merge(const WindowFunnelState& other) {
+ void merge(const WindowFunnelStateOld& other) {
if (other.events.empty()) {
return;
}
@@ -258,18 +612,19 @@ struct WindowFunnelState {
};
template <typename DateValueType, typename NativeType>
-class AggregateFunctionWindowFunnel
+class AggregateFunctionWindowFunnelOld
: public IAggregateFunctionDataHelper<
- WindowFunnelState<DateValueType, NativeType>,
- AggregateFunctionWindowFunnel<DateValueType, NativeType>> {
+ WindowFunnelStateOld<DateValueType, NativeType>,
+ AggregateFunctionWindowFunnelOld<DateValueType, NativeType>>
{
public:
- AggregateFunctionWindowFunnel(const DataTypes& argument_types_)
+ AggregateFunctionWindowFunnelOld(const DataTypes& argument_types_)
: IAggregateFunctionDataHelper<
- WindowFunnelState<DateValueType, NativeType>,
- AggregateFunctionWindowFunnel<DateValueType,
NativeType>>(argument_types_) {}
+ WindowFunnelStateOld<DateValueType, NativeType>,
+ AggregateFunctionWindowFunnelOld<DateValueType,
NativeType>>(
+ argument_types_) {}
void create(AggregateDataPtr __restrict place) const override {
- auto data = new (place) WindowFunnelState<DateValueType, NativeType>();
+ auto data = new (place) WindowFunnelStateOld<DateValueType,
NativeType>();
/// support window funnel mode from 2.0. See
`BeExecVersionManager::max_be_exec_version`
data->enable_mode = version >= 3;
}
@@ -318,8 +673,8 @@ public:
this->data(const_cast<AggregateDataPtr>(place)).sort();
assert_cast<ColumnInt32&>(to).get_data().push_back(
IAggregateFunctionDataHelper<
- WindowFunnelState<DateValueType, NativeType>,
- AggregateFunctionWindowFunnel<DateValueType,
NativeType>>::data(place)
+ WindowFunnelStateOld<DateValueType, NativeType>,
+ AggregateFunctionWindowFunnelOld<DateValueType,
NativeType>>::data(place)
.get());
}
diff --git a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
index bf5eacdadfa..5c4236814b3 100644
--- a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
+++ b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
@@ -146,11 +146,15 @@ TEST_F(VWindowFunnelTest, testSerialize) {
for (int i = 0; i < NUM_CONDS; i++) {
agg_function->add(place, column, i, nullptr);
}
+ ColumnVector<Int32> column_result;
+ agg_function->insert_result_into(place, column_result);
+ EXPECT_EQ(column_result.get_data()[0], 3);
ColumnString buf;
VectorBufferWriter buf_writer(buf);
agg_function->serialize(place, buf_writer);
buf_writer.commit();
+ agg_function->destroy(place);
std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
AggregateDataPtr place2 = memory2.get();
@@ -159,11 +163,6 @@ TEST_F(VWindowFunnelTest, testSerialize) {
VectorBufferReader buf_reader(buf.get_data_at(0));
agg_function->deserialize(place2, buf_reader, nullptr);
- ColumnVector<Int32> column_result;
- agg_function->insert_result_into(place, column_result);
- EXPECT_EQ(column_result.get_data()[0], 3);
- agg_function->destroy(place);
-
ColumnVector<Int32> column_result2;
agg_function->insert_result_into(place2, column_result2);
EXPECT_EQ(column_result2.get_data()[0], 3);
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 406395efa38..602e5dffdb2 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1829,7 +1829,7 @@ public class Config extends ConfigBase {
* Max data version of backends serialize block.
*/
@ConfField(mutable = false)
- public static int max_be_exec_version = 4;
+ public static int max_be_exec_version = 5;
/**
* Min data version of backends serialize block.
diff --git a/regression-test/data/nereids_p0/aggregate/window_funnel.out
b/regression-test/data/nereids_p0/aggregate/window_funnel.out
index 3396dd90e82..f16f050c8e6 100644
--- a/regression-test/data/nereids_p0/aggregate/window_funnel.out
+++ b/regression-test/data/nereids_p0/aggregate/window_funnel.out
@@ -26,3 +26,102 @@
-- !window_funnel_increase --
2
+-- !window_funnel_neq --
+2
+
+-- !window_funnel_default0 --
+100123 4
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_default1 --
+100123 3
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_default2 --
+100123 1
+100125 1
+100126 1
+100127 1
+
+-- !window_funnel_default3 --
+100123 1
+100125 1
+100126 1
+100127 1
+
+-- !window_funnel_default4 --
+100123 2
+100125 2
+100126 2
+100127 2
+
+-- !window_funnel_default5 --
+100123 1
+100125 1
+100126 1
+100127 1
+
+-- !window_funnel_default6 --
+100123 4
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_default7 --
+100123 2
+100125 2
+100126 1
+100127 1
+
+-- !window_funnel_default8 --
+100123 4
+100125 2
+100126 0
+100127 1
+
+-- !window_funnel_default9 --
+100123 4
+100125 3
+100126 4
+100127 2
+
+-- !window_funnel_deduplication0 --
+100123 3
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_deduplication1 --
+100123 3
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_deduplication2 --
+100123 3
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_fixed0 --
+100123 2
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_fixed1 --
+100123 2
+100125 3
+100126 2
+100127 2
+
+-- !window_funnel_increase0 --
+100123 3
+100125 3
+100126 2
+100127 2
+
diff --git
a/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out
b/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out
index 7d7a2488245..43948dc7951 100644
---
a/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out
+++
b/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out
@@ -77,3 +77,8 @@
-- !window_funnel_25 --
1
+-- !window_funnel_26 --
+users 13
+browser 10
+buy 1
+
diff --git a/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy
b/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy
index 02562c49f48..5e4eeba7c14 100644
--- a/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy
+++ b/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy
@@ -294,4 +294,365 @@ suite("window_funnel") {
from ${tableName} t;
"""
sql """ DROP TABLE IF EXISTS ${tableName} """
+
+ sql """
+ CREATE TABLE windowfunnel_test (
+ `xwho` varchar(50) NULL COMMENT 'xwho',
+ `xwhen` datetime COMMENT 'xwhen',
+ `xwhat` int NULL COMMENT 'xwhat'
+ )
+ DUPLICATE KEY(xwho)
+ DISTRIBUTED BY HASH(xwho) BUCKETS 3
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """
+ INSERT into windowfunnel_test (xwho, xwhen, xwhat) values ('1',
'2022-03-12 10:41:00', 1),
+ ('1', '2022-03-12
13:28:02', 2),
+ ('1', '2022-03-12
16:15:01', 3),
+ ('1', '2022-03-12
19:05:04', 4);
+ """
+ qt_window_funnel_neq """
+ select window_funnel(3600 * 24, 'default', t.xwhen, t.xwhat = 1,
t.xwhat != 2,t.xwhat=3 ) AS level from windowfunnel_test t;
+ """
+
+ sql """ DROP TABLE IF EXISTS windowfunnel_test """
+ sql """
+ CREATE TABLE windowfunnel_test(
+ user_id BIGINT,
+ event_name VARCHAR(64),
+ event_timestamp datetime,
+ phone_brand varchar(64),
+ tab_num int
+ ) distributed by hash(user_id) buckets 3
properties("replication_num"="1");
+ """
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 3),
+ (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+ """
+ // test default mode
+ qt_window_funnel_default0 """
+ SELECT
+ user_id,
+ window_funnel(3600 * 3, "default", event_timestamp, event_name =
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ // in 5 minutes
+ qt_window_funnel_default1 """
+ SELECT
+ user_id,
+ window_funnel(300, "default", event_timestamp, event_name = '登录',
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ // in 30 seconds
+ qt_window_funnel_default2 """
+ SELECT
+ user_id,
+ window_funnel(30, "default", event_timestamp, event_name = '登录',
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ qt_window_funnel_default3 """
+ SELECT
+ user_id,
+ window_funnel(3600000000, "default", event_timestamp, event_name =
'登录', event_name = '登录',event_name = '访问', event_name = '下单', event_name =
'付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ qt_window_funnel_default4 """
+ SELECT
+ user_id,
+ window_funnel(3600000000, "default", event_timestamp, event_name =
'登录', event_name = '访问',event_name = '访问', event_name = '下单', event_name =
'付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ qt_window_funnel_default5 """
+ SELECT
+ user_id,
+ window_funnel(3600000000, "default", event_timestamp, event_name =
'登录', event_name = '登录', event_name = '登录', event_name = '登录', event_name =
'登录',event_name = '登录', event_name = '登录') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ // complicate expressions
+ qt_window_funnel_default6 """
+ SELECT
+ user_id,
+ window_funnel(3600000000, "default", event_timestamp, event_name =
'登录', event_name != '登陆', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id;
+ """
+ qt_window_funnel_default7 """
+ SELECT
+ user_id,
+ window_funnel(3600000000, "default", event_timestamp, event_name =
'登录', event_name != '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id;
+ """
+ qt_window_funnel_default8 """
+ SELECT
+ user_id,
+ window_funnel(3600000000, "default", event_timestamp,
+ event_name = '登录' AND phone_brand in ('HONOR',
'XIAOMI', 'VIVO') AND tab_num not in (4, 5),
+ event_name = '访问' AND tab_num not in (4, 5),
+ event_name = '下单' AND tab_num not in (6, 7),
+ event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id;
+ """
+
+ sql """ DROP TABLE IF EXISTS windowfunnel_test """
+ sql """
+ CREATE TABLE windowfunnel_test(
+ user_id BIGINT,
+ event_name VARCHAR(64),
+ event_timestamp datetime,
+ phone_brand varchar(64),
+ tab_num int
+ ) distributed by hash(user_id) buckets 3
properties("replication_num"="1");
+ """
+ // test multiple matched event list, output the longest match
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 3),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5),
+ (100123, '登录', '2022-05-14 13:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 13:02:00', 'HONOR', 2),
+ (100123, '下单', '2022-05-14 13:04:00', "HONOR", 3),
+ (100123, '付款', '2022-05-14 13:10:00', 'HONOR', 4),
+ (100126, '登录', '2022-05-15 14:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 14:01:00', 'HONOR', 2),
+ (100126, '下单', '2022-05-15 14:02:00', 'HONOR', 3),
+ (100126, '付款', '2022-05-15 14:03:00', 'HONOR', 4);
+ """
+ qt_window_funnel_default9 """
+ SELECT
+ user_id,
+ window_funnel(3600, "default", event_timestamp, event_name = '登录',
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+
+ // test deduplication mode
+ sql """ DROP TABLE IF EXISTS windowfunnel_test """
+ sql """
+ CREATE TABLE windowfunnel_test(
+ user_id BIGINT,
+ event_name VARCHAR(64),
+ event_timestamp datetime,
+ phone_brand varchar(64),
+ tab_num int
+ ) distributed by hash(user_id) buckets 3
properties("replication_num"="1");
+ """
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+ (100123, '登录', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+ """
+ qt_window_funnel_deduplication0 """
+ SELECT
+ user_id,
+ window_funnel(3600, "deduplication", event_timestamp, event_name =
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ sql """ truncate table windowfunnel_test; """
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+ (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '访问', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+ """
+ qt_window_funnel_deduplication1 """
+ SELECT
+ user_id,
+ window_funnel(3600, "deduplication", event_timestamp, event_name =
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ sql """ truncate table windowfunnel_test; """
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+ (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '下单', '2022-05-14 10:04:00', 'HONOR', 3),
+ (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+ """
+ qt_window_funnel_deduplication2 """
+ SELECT
+ user_id,
+ window_funnel(3600, "deduplication", event_timestamp, event_name =
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+
+
+ // test fixed mode
+ sql """ truncate table windowfunnel_test; """
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '登录', '2022-05-14 10:03:00', 'HONOR', 3),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+ (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+ """
+ qt_window_funnel_fixed0 """
+ SELECT
+ user_id,
+ window_funnel(3600, "fixed", event_timestamp, event_name = '登录',
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+ sql """ DROP TABLE IF EXISTS windowfunnel_test """
+ sql """
+ CREATE TABLE windowfunnel_test(
+ user_id BIGINT,
+ event_name VARCHAR(64),
+ event_timestamp datetime,
+ phone_brand varchar(64),
+ tab_num int
+ ) distributed by hash(user_id) buckets 3
properties("replication_num"="1");
+ """
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '登录2', '2022-05-14 10:03:00', 'HONOR', 3),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+ (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+ """
+ qt_window_funnel_fixed1 """
+ SELECT
+ user_id,
+ window_funnel(3600, "fixed", event_timestamp, event_name = '登录',
event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+
+ // test increase mode
+ sql """ DROP TABLE IF EXISTS windowfunnel_test """
+ sql """
+ CREATE TABLE windowfunnel_test(
+ user_id BIGINT,
+ event_name VARCHAR(64),
+ event_timestamp datetime,
+ phone_brand varchar(64),
+ tab_num int
+ ) distributed by hash(user_id) buckets 3
properties("replication_num"="1");
+ """
+ sql """
+ INSERT INTO windowfunnel_test VALUES
+ (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1),
+ (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2),
+ (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4),
+ (100123, '付款', '2022-05-14 10:04:00', 'HONOR', 4),
+ (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1),
+ (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2),
+ (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6),
+ (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1),
+ (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2),
+ (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1),
+ (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5);
+ """
+ qt_window_funnel_increase0 """
+ SELECT
+ user_id,
+ window_funnel(3600, "increase", event_timestamp, event_name =
'登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level
+ FROM windowfunnel_test
+ GROUP BY user_id
+ order BY user_id
+ """
+
}
diff --git
a/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql
b/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql
index 850be3e4f4b..00fc65c1cc8 100644
---
a/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql
+++
b/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql
@@ -53,41 +53,40 @@ insert into user_analysis values
(1000012,'browse','2022-07-28 00:00:00');
insert into user_analysis values (1000013,'browse','2022-07-29 00:00:00');
insert into user_analysis values (1000014,'browse','2022-07-30 00:00:00');
insert into user_analysis values (1000015,'browse','2022-07-31 00:00:00');
---- Nereids does't support window function
---- WITH
---- level_detail AS (
---- SELECT
---- level
---- ,COUNT(1) AS count_user
---- FROM (
---- SELECT
---- user_id
---- ,window_funnel(
---- 1800
---- ,'default'
---- ,event_time
---- ,event_type = 'browse'
---- ,event_type = 'favorite'
---- ,event_type = 'shopping cart'
---- ,event_type = 'buy'
---- ) AS level
---- FROM user_analysis
---- WHERE event_time >= TIMESTAMP '2022-07-17 00:00:00'
---- AND event_time < TIMESTAMP '2022-07-31 00:00:00'
---- GROUP BY user_id
---- ) AS basic_table
---- GROUP BY level
---- ORDER BY level ASC )
---- SELECT CASE level WHEN 0 THEN 'users'
---- WHEN 1 THEN 'browser'
---- WHEN 2 THEN 'favorite'
---- WHEN 3 THEN 'shopping cart'
---- WHEN 4 THEN 'buy'
---- END
---- ,SUM(count_user) over ( ORDER BY level DESC )
---- FROM level_detail
---- GROUP BY level
---- ,count_user
---- ORDER BY level ASC;
+WITH
+ level_detail AS (
+ SELECT
+ level
+ ,COUNT(1) AS count_user
+ FROM (
+ SELECT
+ user_id
+ ,window_funnel(
+ 1800
+ ,'default'
+ ,event_time
+ ,event_type = 'browse'
+ ,event_type = 'favorite'
+ ,event_type = 'shopping cart'
+ ,event_type = 'buy'
+ ) AS level
+ FROM user_analysis
+ WHERE event_time >= TIMESTAMP '2022-07-17 00:00:00'
+ AND event_time < TIMESTAMP '2022-07-31 00:00:00'
+ GROUP BY user_id
+ ) AS basic_table
+ GROUP BY level
+ ORDER BY level ASC )
+SELECT CASE level WHEN 0 THEN 'users'
+ WHEN 1 THEN 'browser'
+ WHEN 2 THEN 'favorite'
+ WHEN 3 THEN 'shopping cart'
+ WHEN 4 THEN 'buy'
+ END
+ ,SUM(count_user) over ( ORDER BY level DESC )
+FROM level_detail
+GROUP BY level
+ ,count_user
+ORDER BY level ASC;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]