This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1a8a1e5b16 [Feature](count_by_enum) support count_by_enum function
(#22071)
1a8a1e5b16 is described below
commit 1a8a1e5b16acf10d7763201a42a6ba813ee9a2a9
Author: czzmmc <[email protected]>
AuthorDate: Sun Aug 6 16:05:14 2023 +0800
[Feature](count_by_enum) support count_by_enum function (#22071)
count_by_enum(expr1, expr2, ... , exprN);
Treats the data in a column as an enumeration and counts the number of
values in each enumeration. Returns the number of enumerated values for each
column, and the number of non-null values versus the number of null values.
---
.../aggregate_function_count_by_enum.cpp | 61 +++++
.../aggregate_function_count_by_enum.h | 205 ++++++++++++++
.../aggregate_function_simple_factory.cpp | 2 +
be/src/vec/utils/count_by_enum_helpers.hpp | 67 +++++
.../aggregate_functions/vec_count_by_enum_test.cpp | 298 +++++++++++++++++++++
.../aggregate-functions/count_by_enum.md | 152 +++++++++++
docs/sidebars.json | 3 +-
.../aggregate-functions/count_by_enum.md | 151 +++++++++++
.../doris/catalog/BuiltinAggregateFunctions.java | 2 +
.../java/org/apache/doris/catalog/FunctionSet.java | 16 ++
.../expressions/functions/agg/CountByEnum.java | 63 +++++
.../visitor/AggregateFunctionVisitor.java | 5 +
.../org/apache/doris/analysis/AggregateTest.java | 42 +++
.../test_aggregate_count_by_enum.out | 76 ++++++
.../test_aggregate_count_by_enum.groovy | 74 +++++
15 files changed, 1216 insertions(+), 1 deletion(-)
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp
b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp
new file mode 100644
index 0000000000..1a0bf25182
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+//
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionCount.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_count_by_enum.h"
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+AggregateFunctionPtr create_aggregate_function_count_by_enum(const
std::string& name,
+ const DataTypes&
argument_types,
+ const bool
result_is_nullable) {
+ if (argument_types.size() < 1) {
+ LOG(WARNING) << fmt::format("Illegal number {} of argument for
aggregate function {}",
+ argument_types.size(), name);
+ return nullptr;
+ }
+
+ auto type = argument_types[0].get();
+ if (type->is_nullable()) {
+ type = assert_cast<const
DataTypeNullable*>(type)->get_nested_type().get();
+ }
+
+ WhichDataType which(*type);
+
+ if (which.is_string()) {
+ return
std::make_shared<AggregateFunctionCountByEnum<AggregateFunctionCountByEnumData>>(
+ argument_types);
+ }
+
+ LOG(WARNING) << fmt::format("unsupported input type {} for aggregate
function {}",
+ argument_types[0]->get_name(), name);
+ return nullptr;
+}
+
+void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory&
factory) {
+ factory.register_function("count_by_enum",
create_aggregate_function_count_by_enum, true);
+ factory.register_function("count_by_enum",
create_aggregate_function_count_by_enum, false);
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h
b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h
new file mode 100644
index 0000000000..273fa2a1e4
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+//
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionCount.h
+// and modified by Doris
+
+#pragma once
+
+#include <array>
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+#include "vec/utils/count_by_enum_helpers.hpp"
+
+namespace doris::vectorized {
+
+struct AggregateFunctionCountByEnumData {
+ using MapType = std::unordered_map<std::string, uint64_t>;
+
+ void reset() { data_vec.clear(); }
+
+ void add(int idx, const StringRef& value, const UInt64& number = 1) {
+ if (data_vec.size() <= idx) {
+ data_vec.emplace_back();
+ }
+
+ std::string key = value.to_string();
+ CountByEnumData& data = data_vec[idx];
+ MapType& unordered_map = data.cbe;
+ auto it = unordered_map.find(key);
+ if (it != unordered_map.end()) {
+ it->second += number;
+ } else {
+ unordered_map.emplace(key, number);
+ }
+ data.not_null += number;
+ data.all += number;
+ }
+
+ void add(int idx, const UInt64& number = 1) {
+ if (data_vec.size() <= idx) {
+ data_vec.emplace_back();
+ }
+
+ data_vec[idx].null += number;
+ data_vec[idx].all += number;
+ }
+
+ void merge(const AggregateFunctionCountByEnumData& rhs) {
+ for (int idx = 0; idx < rhs.data_vec.size(); idx++) {
+ CountByEnumData& data =
+ data_vec.size() <= idx ? data_vec.emplace_back() :
data_vec[idx];
+ const CountByEnumData& rhs_data = rhs.data_vec[idx];
+ const MapType& rhs_unordered_map = rhs_data.cbe;
+ MapType& lhs_unordered_map = data.cbe;
+
+ for (auto rhs_it : rhs_unordered_map) {
+ auto lhs_it = lhs_unordered_map.find(rhs_it.first);
+ if (lhs_it != lhs_unordered_map.end()) {
+ lhs_it->second += rhs_it.second;
+ } else {
+ lhs_unordered_map.emplace(rhs_it.first, rhs_it.second);
+ }
+ }
+
+ data.not_null += rhs_data.not_null;
+ data.null += rhs_data.null;
+ data.all += rhs_data.all;
+ }
+ }
+
+ void write(BufferWritable& buf) const {
+ write_binary(data_vec.size(), buf);
+
+ for (const auto& data : data_vec) {
+ const MapType& unordered_map = data.cbe;
+ write_binary(unordered_map.size(), buf);
+
+ for (const auto& [key, value] : unordered_map) {
+ write_binary(value, buf);
+ write_binary(key, buf);
+ }
+
+ write_binary(data.not_null, buf);
+ write_binary(data.null, buf);
+ write_binary(data.all, buf);
+ }
+ }
+
+ void read(BufferReadable& buf) {
+ data_vec.clear();
+
+ uint64_t vec_size_number = 0;
+ read_binary(vec_size_number, buf);
+
+ for (int idx = 0; idx < vec_size_number; idx++) {
+ uint64_t element_number = 0;
+ read_binary(element_number, buf);
+
+ MapType unordered_map;
+ unordered_map.reserve(element_number);
+ for (auto i = 0; i < element_number; i++) {
+ std::string key;
+ uint64_t value;
+ read_binary(value, buf);
+ read_binary(key, buf);
+ unordered_map.emplace(std::move(key), value);
+ }
+
+ CountByEnumData data;
+ data.cbe = std::move(unordered_map);
+ read_binary(data.not_null, buf);
+ read_binary(data.null, buf);
+ read_binary(data.all, buf);
+ data_vec.emplace_back(std::move(data));
+ }
+ }
+
+ std::string get() const {
+ rapidjson::StringBuffer buffer;
+ build_json_from_vec(buffer, data_vec);
+ return std::string(buffer.GetString());
+ }
+
+private:
+ std::vector<CountByEnumData> data_vec;
+};
+
+template <typename Data>
+class AggregateFunctionCountByEnum final
+ : public IAggregateFunctionDataHelper<Data,
AggregateFunctionCountByEnum<Data>> {
+public:
+ AggregateFunctionCountByEnum() = default;
+ AggregateFunctionCountByEnum(const DataTypes& argument_types_)
+ : IAggregateFunctionDataHelper<Data,
AggregateFunctionCountByEnum<Data>>(
+ argument_types_) {
+ arg_count = argument_types_.size();
+ }
+
+ std::string get_name() const override { return "count_by_enum"; }
+
+ DataTypePtr get_return_type() const override { return
std::make_shared<DataTypeString>(); }
+
+ void add(AggregateDataPtr __restrict place, const IColumn** columns,
size_t row_num,
+ Arena* arena) const override {
+ for (int i = 0; i < arg_count; i++) {
+ const auto* nullable_column =
check_and_get_column<ColumnNullable>(columns[i]);
+ if (nullable_column == nullptr) {
+ this->data(place).add(
+ i, static_cast<const
ColumnString&>(*columns[i]).get_data_at(row_num));
+ } else if (nullable_column->is_null_at(row_num)) {
+ // TODO create a null vector
+ this->data(place).add(i);
+ } else {
+ this->data(place).add(
+ i, static_cast<const
ColumnString&>(nullable_column->get_nested_column())
+ .get_data_at(row_num));
+ }
+ }
+ }
+
+ void reset(AggregateDataPtr place) const override {
this->data(place).reset(); }
+
+ void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+ Arena* arena) const override {
+ this->data(place).merge(this->data(rhs));
+ }
+
+ void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
+ this->data(place).write(buf);
+ }
+
+ void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+ Arena*) const override {
+ this->data(place).read(buf);
+ }
+
+ void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
+ const std::string json_arr = this->data(place).get();
+ assert_cast<ColumnString&>(to).insert_data(json_arr.c_str(),
json_arr.length());
+ }
+
+private:
+ size_t arg_count;
+};
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
index 9ff47a6f57..f541f99b0a 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
@@ -35,6 +35,7 @@ void
register_aggregate_function_min_by(AggregateFunctionSimpleFactory& factory)
void register_aggregate_function_max_by(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_avg(AggregateFunctionSimpleFactory& factory);
void register_aggregate_function_count(AggregateFunctionSimpleFactory&
factory);
+void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_HLL_union_agg(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_uniq(AggregateFunctionSimpleFactory& factory);
void register_aggregate_function_bit(AggregateFunctionSimpleFactory& factory);
@@ -71,6 +72,7 @@ AggregateFunctionSimpleFactory&
AggregateFunctionSimpleFactory::instance() {
register_aggregate_function_max_by(instance);
register_aggregate_function_avg(instance);
register_aggregate_function_count(instance);
+ register_aggregate_function_count_by_enum(instance);
register_aggregate_function_count_old(instance);
register_aggregate_function_sum_old(instance);
register_aggregate_function_uniq(instance);
diff --git a/be/src/vec/utils/count_by_enum_helpers.hpp
b/be/src/vec/utils/count_by_enum_helpers.hpp
new file mode 100644
index 0000000000..20c38b765b
--- /dev/null
+++ b/be/src/vec/utils/count_by_enum_helpers.hpp
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <rapidjson/document.h>
+#include <rapidjson/prettywriter.h>
+#include <rapidjson/stringbuffer.h>
+
+#include <boost/dynamic_bitset.hpp>
+
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+struct CountByEnumData {
+ std::unordered_map<std::string, uint64_t> cbe;
+ uint64_t not_null;
+ uint64_t null;
+ uint64_t all;
+};
+
+void build_json_from_vec(rapidjson::StringBuffer& buffer,
+ const std::vector<CountByEnumData>& data_vec) {
+ rapidjson::Document doc;
+ doc.SetArray();
+ rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
+
+ int vec_size_number = data_vec.size();
+ for (int idx = 0; idx < vec_size_number; ++idx) {
+ rapidjson::Value obj(rapidjson::kObjectType);
+
+ rapidjson::Value obj_cbe(rapidjson::kObjectType);
+ std::unordered_map<std::string, uint64_t> unordered_map =
data_vec[idx].cbe;
+ for (auto it : unordered_map) {
+ rapidjson::Value key_cbe(it.first.c_str(), allocator);
+ rapidjson::Value value_cbe(it.second);
+ obj_cbe.AddMember(key_cbe, value_cbe, allocator);
+ }
+ obj.AddMember("cbe", obj_cbe, allocator);
+ obj.AddMember("notnull", data_vec[idx].not_null, allocator);
+ obj.AddMember("null", data_vec[idx].null, allocator);
+ obj.AddMember("all", data_vec[idx].all, allocator);
+
+ doc.PushBack(obj, allocator);
+ }
+
+ rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+ doc.Accept(writer);
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/aggregate_functions/vec_count_by_enum_test.cpp
b/be/test/vec/aggregate_functions/vec_count_by_enum_test.cpp
new file mode 100644
index 0000000000..fa953b5101
--- /dev/null
+++ b/be/test/vec/aggregate_functions/vec_count_by_enum_test.cpp
@@ -0,0 +1,298 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <rapidjson/document.h>
+
+#include "common/logging.h"
+#include "gtest/gtest.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+
+namespace doris::vectorized {
+
+void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory&
factory);
+
+class VCountByEnumTest : public testing::Test {
+public:
+ AggregateFunctionPtr agg_function;
+
+ VCountByEnumTest() {}
+
+ void SetUp() {
+ AggregateFunctionSimpleFactory factory =
AggregateFunctionSimpleFactory::instance();
+ DataTypes data_types = {
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+ };
+ agg_function = factory.get("count_by_enum", data_types, true);
+ EXPECT_NE(agg_function, nullptr);
+ }
+
+ void TearDown() {}
+};
+
+TEST_F(VCountByEnumTest, testEmpty) {
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+
+ ColumnString buf;
+ VectorBufferWriter buf_writer(buf);
+ agg_function->serialize(place, buf_writer);
+ buf_writer.commit();
+ LOG(INFO) << "buf size : " << buf.size();
+ VectorBufferReader buf_reader(buf.get_data_at(0));
+ agg_function->deserialize(place, buf_reader, nullptr);
+
+ std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place2 = memory2.get();
+ agg_function->create(place2);
+
+ agg_function->merge(place, place2, nullptr);
+ auto column_result =
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+ agg_function->insert_result_into(place, *column_result);
+ auto& result = assert_cast<ColumnString&>(*column_result);
+ LOG(INFO) << "result : " << result.get_data_at(0);
+ EXPECT_EQ(result.get_data_at(0).to_string(), "[]");
+
+ auto column_result2 =
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+ agg_function->insert_result_into(place2, *column_result2);
+ auto& result2 = assert_cast<ColumnString&>(*column_result2);
+ LOG(INFO) << "result2 : " << result2.get_data_at(0);
+ EXPECT_EQ(result2.get_data_at(0).to_string(), "[]");
+
+ agg_function->destroy(place);
+ agg_function->destroy(place2);
+}
+
+TEST_F(VCountByEnumTest, testNotNullableSample) {
+ const int batch_size = 5;
+ auto column_f1 = ColumnString::create();
+ column_f1->insert("F");
+ column_f1->insert("F");
+ column_f1->insert("M");
+ column_f1->insert("F");
+ column_f1->insert("M");
+
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+ const IColumn* column[1] = {column_f1.get()};
+ for (int i = 0; i < batch_size; i++) {
+ agg_function->add(place, column, i, nullptr);
+ }
+
+ std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place2 = memory2.get();
+ agg_function->create(place2);
+
+ agg_function->merge(place2, place, nullptr);
+
+ auto column_result2 =
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+ agg_function->insert_result_into(place2, *column_result2);
+ auto& result2 = assert_cast<ColumnString&>(*column_result2);
+
+ rapidjson::Document document;
+ document.Parse(result2.get_data_at(0).to_string().c_str());
+ const rapidjson::Value& item0 = document[0];
+ EXPECT_EQ(item0["cbe"]["M"].GetInt(), 2);
+ EXPECT_EQ(item0["cbe"]["F"].GetInt(), 3);
+ EXPECT_EQ(item0["notnull"].GetInt(), 5);
+ EXPECT_EQ(item0["null"].GetInt(), 0);
+ EXPECT_EQ(item0["all"].GetInt(), 5);
+
+ agg_function->destroy(place);
+ agg_function->destroy(place2);
+}
+
+TEST_F(VCountByEnumTest, testNullableSample) {
+ const int batch_size = 5;
+ auto column_f1 = ColumnString::create();
+ column_f1->insert("F");
+ column_f1->insert("F");
+ column_f1->insert("M");
+ ColumnPtr column_f1_ptr = std::move(column_f1);
+ auto null_map = ColumnVector<uint8_t>::create();
+ std::vector<uint8_t> offs = {0, 0, 0, 1, 1};
+ for (int i = 0; i < offs.size(); ++i) {
+ null_map->insert(offs[i]);
+ }
+
+ auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr,
std::move(null_map));
+
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+ const IColumn* column[1] = {nullable_column_f1.get()};
+ for (int i = 0; i < batch_size; i++) {
+ agg_function->add(place, column, i, nullptr);
+ }
+
+ std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place2 = memory2.get();
+ agg_function->create(place2);
+
+ agg_function->merge(place2, place, nullptr);
+
+ auto column_result2 =
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+ agg_function->insert_result_into(place2, *column_result2);
+ auto& result2 = assert_cast<ColumnString&>(*column_result2);
+
+ rapidjson::Document document;
+ document.Parse(result2.get_data_at(0).to_string().c_str());
+ const rapidjson::Value& item0 = document[0];
+ EXPECT_EQ(item0["cbe"]["M"].GetInt(), 1);
+ EXPECT_EQ(item0["cbe"]["F"].GetInt(), 2);
+ EXPECT_EQ(item0["notnull"].GetInt(), 3);
+ EXPECT_EQ(item0["null"].GetInt(), 2);
+ EXPECT_EQ(item0["all"].GetInt(), 5);
+
+ agg_function->destroy(place);
+ agg_function->destroy(place2);
+}
+
+TEST_F(VCountByEnumTest, testNoMerge) {
+ const int batch_size = 5;
+ auto column_f1 = ColumnString::create();
+ column_f1->insert("F");
+ column_f1->insert("F");
+ column_f1->insert("M");
+ ColumnPtr column_f1_ptr = std::move(column_f1);
+ auto null_map = ColumnVector<uint8_t>::create();
+ std::vector<uint8_t> offs = {0, 0, 0, 1, 1};
+ for (int i = 0; i < offs.size(); ++i) {
+ null_map->insert(offs[i]);
+ }
+
+ auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr,
std::move(null_map));
+
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+ const IColumn* column[1] = {nullable_column_f1.get()};
+ for (int i = 0; i < batch_size; i++) {
+ agg_function->add(place, column, i, nullptr);
+ }
+
+ auto column_result =
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+ agg_function->insert_result_into(place, *column_result);
+ auto& result = assert_cast<ColumnString&>(*column_result);
+
+ rapidjson::Document document;
+ document.Parse(result.get_data_at(0).to_string().c_str());
+ const rapidjson::Value& item0 = document[0];
+ EXPECT_EQ(item0["cbe"]["M"].GetInt(), 1);
+ EXPECT_EQ(item0["cbe"]["F"].GetInt(), 2);
+ EXPECT_EQ(item0["notnull"].GetInt(), 3);
+ EXPECT_EQ(item0["null"].GetInt(), 2);
+ EXPECT_EQ(item0["all"].GetInt(), 5);
+
+ agg_function->destroy(place);
+}
+
+TEST_F(VCountByEnumTest, testSerialize) {
+ const int batch_size = 5;
+ auto column_f1 = ColumnString::create();
+ column_f1->insert("F");
+ column_f1->insert("F");
+ column_f1->insert("M");
+ ColumnPtr column_f1_ptr = std::move(column_f1);
+ auto null_map = ColumnVector<uint8_t>::create();
+ std::vector<uint8_t> offs = {0, 0, 0, 1, 1};
+ for (int i = 0; i < offs.size(); ++i) {
+ null_map->insert(offs[i]);
+ }
+ auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr,
std::move(null_map));
+
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+ const IColumn* column[1] = {nullable_column_f1.get()};
+ for (int i = 0; i < batch_size; i++) {
+ agg_function->add(place, column, i, nullptr);
+ }
+
+ ColumnString buf;
+ VectorBufferWriter buf_writer(buf);
+ agg_function->serialize(place, buf_writer);
+ buf_writer.commit();
+ agg_function->destroy(place);
+
+ std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place2 = memory2.get();
+ agg_function->create(place2);
+
+ VectorBufferReader buf_reader(buf.get_data_at(0));
+ agg_function->deserialize(place2, buf_reader, nullptr);
+
+ auto column_result1 =
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+ agg_function->insert_result_into(place2, *column_result1);
+ auto& result1 = assert_cast<ColumnString&>(*column_result1);
+
+ rapidjson::Document document;
+ document.Parse(result1.get_data_at(0).to_string().c_str());
+ const rapidjson::Value& item0 = document[0];
+ EXPECT_EQ(item0["cbe"]["M"].GetInt(), 1);
+ EXPECT_EQ(item0["cbe"]["F"].GetInt(), 2);
+ EXPECT_EQ(item0["notnull"].GetInt(), 3);
+ EXPECT_EQ(item0["null"].GetInt(), 2);
+ EXPECT_EQ(item0["all"].GetInt(), 5);
+
+ auto column_f1_2 = ColumnString::create();
+ column_f1_2->insert("F");
+ column_f1_2->insert("F");
+ column_f1_2->insert("M");
+ ColumnPtr column_f1_2_ptr = std::move(column_f1_2);
+ auto null_map_2 = ColumnVector<uint8_t>::create();
+ std::vector<uint8_t> offs_2 = {0, 0, 0, 1, 1};
+ for (int i = 0; i < offs.size(); ++i) {
+ null_map_2->insert(offs_2[i]);
+ }
+ auto nullable_column_f1_2 = ColumnNullable::create(column_f1_2_ptr,
std::move(null_map_2));
+
+ std::unique_ptr<char[]> memory3(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place3 = memory3.get();
+ agg_function->create(place3);
+ const IColumn* column2[1] = {nullable_column_f1_2.get()};
+ for (int i = 0; i < batch_size; i++) {
+ agg_function->add(place3, column2, i, nullptr);
+ }
+
+ agg_function->merge(place2, place3, nullptr);
+
+ auto column_result2 =
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+ agg_function->insert_result_into(place2, *column_result2);
+ auto& result2 = assert_cast<ColumnString&>(*column_result2);
+
+ rapidjson::Document document2;
+ document2.Parse(result2.get_data_at(0).to_string().c_str());
+ const rapidjson::Value& item0_2 = document2[0];
+ EXPECT_EQ(item0_2["cbe"]["M"].GetInt(), 2);
+ EXPECT_EQ(item0_2["cbe"]["F"].GetInt(), 4);
+ EXPECT_EQ(item0_2["notnull"].GetInt(), 6);
+ EXPECT_EQ(item0_2["null"].GetInt(), 4);
+ EXPECT_EQ(item0_2["all"].GetInt(), 10);
+
+ agg_function->destroy(place2);
+ agg_function->destroy(place3);
+}
+} // namespace doris::vectorized
diff --git
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
new file mode 100644
index 0000000000..379661d0cf
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
@@ -0,0 +1,152 @@
+---
+{
+ "title": "COUNT_BY_ENUM",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## COUNT_BY_ENUM
+
+<version since="1.2.0">
+
+COUNT_BY_ENUM
+
+</version>
+
+### Description
+#### Syntax
+
+`count_by_enum(expr1, expr2, ... , exprN);`
+
+Treats the data in a column as an enumeration and counts the number of values
in each enumeration. Returns the number of enumerated values for each column,
and the number of non-null values versus the number of null values.
+
+#### Arguments
+
+`expr1` — At least one input must be specified. The value is a column of type
`STRING`.
+
+##### Returned value
+
+Returns a JSONArray string.
+
+For example:
+```json
+[{
+ "cbe": {
+ "F": 100,
+ "M": 99
+ },
+ "notnull": 199,
+ "null": 1,
+ "all": 200
+}, {
+ "cbe": {
+ "20": 10,
+ "30": 5,
+ "35": 1
+ },
+ "notnull": 16,
+ "null": 184,
+ "all": 200
+}, {
+ "cbe": {
+ "China": 10,
+ "United States": 9,
+ "England": 20,
+ "Germany": 30
+ },
+ "notnull": 69,
+ "null": 131,
+ "all": 200
+}]
+```
+Description: The return value is a JSON array string and the order of the
internal objects is the order of the input parameters.
+* cbe: count of non-null values based on enumeration values
+* notnull: number of non-null values.
+* null: number of null values
+* all: total number, including both null and non-null values.
+
+
+### example
+
+```sql
+DROP TABLE IF EXISTS count_by_enum_test;
+
+CREATE TABLE count_by_enum_test(
+ `id` varchar(1024) NULL,
+ `f1` text REPLACE_IF_NOT_NULL NULL,
+ `f2` text REPLACE_IF_NOT_NULL NULL,
+ `f3` text REPLACE_IF_NOT_NULL NULL
+)
+AGGREGATE KEY(`id`)
+DISTRIBUTED BY HASH(id) BUCKETS 3
+PROPERTIES (
+ "replication_num" = "1"
+);
+
+INSERT into count_by_enum_test (id, f1, f2, f3) values
+ (1, "F", "10", "China"),
+ (2, "F", "20", "China"),
+ (3, "M", NULL, "United
States"),
+ (4, "M", NULL, "United
States"),
+ (5, "M", NULL, "England");
+
+SELECT * from count_by_enum_test;
+
++------+------+------+---------------+
+| id | f1 | f2 | f3 |
++------+------+------+---------------+
+| 1 | F | 10 | China |
+| 2 | F | 20 | China |
+| 3 | M | NULL | United States |
+| 4 | M | NULL | United States |
+| 5 | M | NULL | England |
++------+------+------+---------------+
+
+select count_by_enum(f1) from count_by_enum_test;
+
++------------------------------------------------------+
+| count_by_enum(`f1`) |
++------------------------------------------------------+
+| [{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5}] |
++------------------------------------------------------+
+
+select count_by_enum(f2) from count_by_enum_test;
+
++--------------------------------------------------------+
+| count_by_enum(`f2`) |
++--------------------------------------------------------+
+| [{"cbe":{"10":1,"20":1},"notnull":2,"null":3,"all":5}] |
++--------------------------------------------------------+
+
+select count_by_enum(f1,f2,f3) from count_by_enum_test;
+
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| count_by_enum(`f1`, `f2`, `f3`)
|
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|
[{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5},{"cbe":{"20":1,"10":1},"notnull":2,"null":3,"all":5},{"cbe":{"England":1,"United
States":2,"China":2},"notnull":5,"null":0,"all":5}] |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+```
+
+### keywords
+
+COUNT_BY_ENUM
diff --git a/docs/sidebars.json b/docs/sidebars.json
index d2de428a60..5318df7b86 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -539,7 +539,8 @@
"sql-manual/sql-functions/aggregate-functions/sequence_match",
"sql-manual/sql-functions/aggregate-functions/sequence_count",
"sql-manual/sql-functions/aggregate-functions/grouping",
-
"sql-manual/sql-functions/aggregate-functions/grouping_id"
+
"sql-manual/sql-functions/aggregate-functions/grouping_id",
+
"sql-manual/sql-functions/aggregate-functions/count_by_enum"
]
},
{
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
new file mode 100644
index 0000000000..873c446345
--- /dev/null
+++
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
@@ -0,0 +1,151 @@
+---
+{
+ "title": "COUNT_BY_ENUM",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## COUNT_BY_ENUM
+
+<version since="1.2.0">
+
+COUNT_BY_ENUM
+
+</version>
+
+### description
+#### Syntax
+
+`count_by_enum(expr1, expr2, ... , exprN);`
+
+将列中数据看作枚举值,统计每个枚举值的个数。返回各个列枚举值的个数,以及非 null 值的个数与 null 值的个数。
+
+#### Arguments
+
+`expr1` — 至少填写一个输入。值为字符串(STRING)类型的列。
+
+##### Returned value
+
+返回一个 JSONArray 字符串。
+
+例如:
+```json
+[{
+ "cbe": {
+ "F": 100,
+ "M": 99
+ },
+ "notnull": 199,
+ "null": 1,
+ "all": 200
+}, {
+ "cbe": {
+ "20": 10,
+ "30": 5,
+ "35": 1
+ },
+ "notnull": 16,
+ "null": 184,
+ "all": 200
+}, {
+ "cbe": {
+ "北京": 10,
+ "上海": 9,
+ "广州": 20,
+ "深圳": 30
+ },
+ "notnull": 69,
+ "null": 131,
+ "all": 200
+}]
+```
+说明:返回值为一个 JSON array 字符串,内部对象的顺序是输入参数的顺序。
+* cbe:根据枚举值统计非 null 值的统计结果
+* notnull:非 null 的个数
+* null:null 值个数
+* all:总数,包括 null 值与非 null 值
+
+### example
+
+```sql
+DROP TABLE IF EXISTS count_by_enum_test;
+
+CREATE TABLE count_by_enum_test(
+ `id` varchar(1024) NULL,
+ `f1` text REPLACE_IF_NOT_NULL NULL,
+ `f2` text REPLACE_IF_NOT_NULL NULL,
+ `f3` text REPLACE_IF_NOT_NULL NULL
+ )
+AGGREGATE KEY(`id`)
+DISTRIBUTED BY HASH(id) BUCKETS 3
+PROPERTIES (
+ "replication_num" = "1"
+);
+
+INSERT into count_by_enum_test (id, f1, f2, f3) values
+ (1, "F", "10", "北京"),
+ (2, "F", "20", "北京"),
+ (3, "M", NULL, "上海"),
+ (4, "M", NULL, "上海"),
+ (5, "M", NULL, "广州");
+
+SELECT * from count_by_enum_test;
+
++------+------+------+--------+
+| id | f1 | f2 | f3 |
++------+------+------+--------+
+| 2 | F | 20 | 北京 |
+| 3 | M | NULL | 上海 |
+| 4 | M | NULL | 上海 |
+| 5 | M | NULL | 广州 |
+| 1 | F | 10 | 北京 |
++------+------+------+--------+
+
+select count_by_enum(f1) from count_by_enum_test;
+
++------------------------------------------------------+
+| count_by_enum(`f1`) |
++------------------------------------------------------+
+| [{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5}] |
++------------------------------------------------------+
+
+select count_by_enum(f2) from count_by_enum_test;
+
++--------------------------------------------------------+
+| count_by_enum(`f2`) |
++--------------------------------------------------------+
+| [{"cbe":{"10":1,"20":1},"notnull":2,"null":3,"all":5}] |
++--------------------------------------------------------+
+
+select count_by_enum(f1,f2,f3) from count_by_enum_test;
+
++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| count_by_enum(`f1`, `f2`, `f3`)
|
++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|
[{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5},{"cbe":{"20":1,"10":1},"notnull":2,"null":3,"all":5},{"cbe":{"广州":1,"上海":2,"北京":2},"notnull":5,"null":0,"all":5}]
|
++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+```
+
+### keywords
+
+COUNT_BY_ENUM
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
index 44f92bdd63..0e361afb79 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
@@ -27,6 +27,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnionInt;
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectList;
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectSet;
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
@@ -85,6 +86,7 @@ public class BuiltinAggregateFunctions implements
FunctionHelper {
agg(CollectList.class, "collect_list"),
agg(CollectSet.class, "collect_set"),
agg(Count.class, "count"),
+ agg(CountByEnum.class, "count_by_enum"),
agg(GroupBitAnd.class, "group_bit_and"),
agg(GroupBitOr.class, "group_bit_or"),
agg(GroupBitXor.class, "group_bit_xor"),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 869b59cd86..616733918a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -203,6 +203,7 @@ public class FunctionSet<T> {
public static final String HISTOGRAM = "histogram";
public static final String HIST = "hist";
public static final String MAP_AGG = "map_agg";
+ public static final String COUNT_BY_ENUM = "count_by_enum";
private static final Map<Type, String> TOPN_UPDATE_SYMBOL =
ImmutableMap.<Type, String>builder()
@@ -1613,6 +1614,21 @@ public class FunctionSet<T> {
"lead", Lists.newArrayList(t, Type.BIGINT), t, t,
true));
}
+ // count_by_enum
+ addBuiltin(AggregateFunction.createBuiltin(COUNT_BY_ENUM,
+ Lists.newArrayList(Type.STRING),
+ Type.STRING,
+ Type.STRING,
+ true,
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ false, true, false, true));
+
}
public Map<String, List<Function>> getVectorizedFunctions() {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountByEnum.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountByEnum.java
new file mode 100644
index 0000000000..8232e9e403
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountByEnum.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.agg;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/** count_by_enum agg function. */
+public class CountByEnum extends AggregateFunction implements
ExplicitlyCastableSignature, AlwaysNotNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(ArrayType.of(StringType.INSTANCE)).args(StringType.INSTANCE)
+ );
+
+ /**
+ * constructor with 1 or more arguments.
+ */
+ public CountByEnum(Expression arg, Expression... varArgs) {
+ super("count_by_enum", ExpressionUtils.mergeArguments(arg, varArgs));
+ }
+
+ @Override
+ public AggregateFunction withDistinctAndChildren(boolean distinct,
List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 1);
+ return new CollectList(distinct, children.get(0));
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitCountByEnum(this, context);
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
index f828a486ff..280b8c47ab 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
@@ -28,6 +28,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnionInt;
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectList;
import org.apache.doris.nereids.trees.expressions.functions.agg.CollectSet;
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
@@ -118,6 +119,10 @@ public interface AggregateFunctionVisitor<R, C> {
return visitAggregateFunction(count, context);
}
+ default R visitCountByEnum(CountByEnum count, C context) {
+ return visitAggregateFunction(count, context);
+ }
+
default R visitMultiDistinctCount(MultiDistinctCount multiDistinctCount, C
context) {
return visitAggregateFunction(multiDistinctCount, context);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java
index 18b45c6bf3..fd58f6e833 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java
@@ -527,4 +527,46 @@ public class AggregateTest extends TestWithFeService {
}
} while (false);
}
+
+ @Test
+ public void testCountByEnumAnalysisException() throws Exception {
+ ConnectContext ctx = UtFrameUtils.createDefaultCtx();
+
+ // normal.
+ do {
+ String query = "select count_by_enum(name) from "
+ + DB_NAME + "." + TABLE_NAME;
+ try {
+ UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
+ } catch (Exception e) {
+ Assert.fail("must be AnalysisException.");
+ }
+ } while (false);
+
+ do {
+ String query = "select count_by_enum(name, commission) from "
+ + DB_NAME + "." + TABLE_NAME;
+ try {
+ UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
+ } catch (Exception e) {
+ Assert.fail("must be AnalysisException.");
+ }
+ } while (false);
+
+ // less argument.
+ do {
+ String query = "select count_by_enum() from "
+ + DB_NAME + "." + TABLE_NAME;
+ try {
+ UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
+ } catch (AnalysisException e) {
+ Assert.assertTrue(e.getMessage().contains("No matching
function with signature: count_by_enum()"));
+ break;
+ } catch (Exception e) {
+ Assert.fail("must be AnalysisException.");
+ }
+ Assert.fail("must be AnalysisException.");
+ } while (false);
+
+ }
}
diff --git
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.out
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.out
new file mode 100644
index 0000000000..cd4fe3d463
--- /dev/null
+++
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.out
@@ -0,0 +1,76 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+0
+
+-- !select --
+5
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+0
+
+-- !select --
+5
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+5
+
+-- !select --
+0
+
+-- !select --
+5
+
diff --git
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.groovy
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.groovy
new file mode 100644
index 0000000000..d111f4ada4
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+//
/testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("test_aggregate_count_by_enum") {
+ sql "set enable_vectorized_engine = true"
+
+ def tableName = "count_by_enum_test"
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `id` varchar(1024) NULL,
+ `f1` text REPLACE_IF_NOT_NULL NULL,
+ `f2` text REPLACE_IF_NOT_NULL NULL,
+ `f3` text REPLACE_IF_NOT_NULL NULL
+ )
+ AGGREGATE KEY(`id`)
+ DISTRIBUTED BY HASH(id) BUCKETS 3
+ PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+
+ sql "INSERT INTO ${tableName} values(1, \"F\", \"10\", \"China\"),(2,
\"F\", \"20\", \"China\"),(3, \"M\", NULL, \"United States\"),(4, \"M\", NULL,
\"United States\"),(5, \"M\", NULL, \"England\");"
+
+ qt_select "select get_json_string(count_by_enum(f1), '\$.[0].cbe.F') from
${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1), '\$.[0].cbe.M') from
${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1), '\$.[0].notnull')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1), '\$.[0].null') from
${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1), '\$.[0].all') from
${tableName}"
+
+ qt_select "select get_json_string(count_by_enum(f2), '\$.[0].cbe.F') from
${tableName}"
+ qt_select "select get_json_string(count_by_enum(f2), '\$.[0].cbe.M') from
${tableName}"
+ qt_select "select get_json_string(count_by_enum(f2), '\$.[0].notnull')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f2), '\$.[0].null') from
${tableName}"
+ qt_select "select get_json_string(count_by_enum(f2), '\$.[0].all') from
${tableName}"
+
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].cbe.F')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].cbe.M')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3),
'\$.[0].notnull') from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].null')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].all')
from ${tableName}"
+
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].cbe.F')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].cbe.M')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3),
'\$.[1].notnull') from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].null')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].all')
from ${tableName}"
+
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].cbe.F')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].cbe.M')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3),
'\$.[2].notnull') from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].null')
from ${tableName}"
+ qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].all')
from ${tableName}"
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]