[doris] branch master updated: [Feature](count_by_enum) support count_by_enum function (#22071)

yiguolei Sun, 06 Aug 2023 01:05:30 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 1a8a1e5b16 [Feature](count_by_enum) support count_by_enum function 
(#22071)
1a8a1e5b16 is described below

commit 1a8a1e5b16acf10d7763201a42a6ba813ee9a2a9
Author: czzmmc <[email protected]>
AuthorDate: Sun Aug 6 16:05:14 2023 +0800

    [Feature](count_by_enum) support count_by_enum function (#22071)
    
    count_by_enum(expr1, expr2, ... , exprN);
    
    Treats the data in a column as an enumeration and counts the number of 
values in each enumeration. Returns the number of enumerated values for each 
column, and the number of non-null values versus the number of null values.
---
 .../aggregate_function_count_by_enum.cpp           |  61 +++++
 .../aggregate_function_count_by_enum.h             | 205 ++++++++++++++
 .../aggregate_function_simple_factory.cpp          |   2 +
 be/src/vec/utils/count_by_enum_helpers.hpp         |  67 +++++
 .../aggregate_functions/vec_count_by_enum_test.cpp | 298 +++++++++++++++++++++
 .../aggregate-functions/count_by_enum.md           | 152 +++++++++++
 docs/sidebars.json                                 |   3 +-
 .../aggregate-functions/count_by_enum.md           | 151 +++++++++++
 .../doris/catalog/BuiltinAggregateFunctions.java   |   2 +
 .../java/org/apache/doris/catalog/FunctionSet.java |  16 ++
 .../expressions/functions/agg/CountByEnum.java     |  63 +++++
 .../visitor/AggregateFunctionVisitor.java          |   5 +
 .../org/apache/doris/analysis/AggregateTest.java   |  42 +++
 .../test_aggregate_count_by_enum.out               |  76 ++++++
 .../test_aggregate_count_by_enum.groovy            |  74 +++++
 15 files changed, 1216 insertions(+), 1 deletion(-)

diff --git 
a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp
new file mode 100644
index 0000000000..1a0bf25182
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.cpp
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionCount.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_count_by_enum.h"
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+AggregateFunctionPtr create_aggregate_function_count_by_enum(const 
std::string& name,
+                                                             const DataTypes& 
argument_types,
+                                                             const bool 
result_is_nullable) {
+    if (argument_types.size() < 1) {
+        LOG(WARNING) << fmt::format("Illegal number {} of argument for 
aggregate function {}",
+                                    argument_types.size(), name);
+        return nullptr;
+    }
+
+    auto type = argument_types[0].get();
+    if (type->is_nullable()) {
+        type = assert_cast<const 
DataTypeNullable*>(type)->get_nested_type().get();
+    }
+
+    WhichDataType which(*type);
+
+    if (which.is_string()) {
+        return 
std::make_shared<AggregateFunctionCountByEnum<AggregateFunctionCountByEnumData>>(
+                argument_types);
+    }
+
+    LOG(WARNING) << fmt::format("unsupported input type {} for aggregate 
function {}",
+                                argument_types[0]->get_name(), name);
+    return nullptr;
+}
+
+void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory& 
factory) {
+    factory.register_function("count_by_enum", 
create_aggregate_function_count_by_enum, true);
+    factory.register_function("count_by_enum", 
create_aggregate_function_count_by_enum, false);
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h 
b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h
new file mode 100644
index 0000000000..273fa2a1e4
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_count_by_enum.h
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionCount.h
+// and modified by Doris
+
+#pragma once
+
+#include <array>
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+#include "vec/utils/count_by_enum_helpers.hpp"
+
+namespace doris::vectorized {
+
+struct AggregateFunctionCountByEnumData {
+    using MapType = std::unordered_map<std::string, uint64_t>;
+
+    void reset() { data_vec.clear(); }
+
+    void add(int idx, const StringRef& value, const UInt64& number = 1) {
+        if (data_vec.size() <= idx) {
+            data_vec.emplace_back();
+        }
+
+        std::string key = value.to_string();
+        CountByEnumData& data = data_vec[idx];
+        MapType& unordered_map = data.cbe;
+        auto it = unordered_map.find(key);
+        if (it != unordered_map.end()) {
+            it->second += number;
+        } else {
+            unordered_map.emplace(key, number);
+        }
+        data.not_null += number;
+        data.all += number;
+    }
+
+    void add(int idx, const UInt64& number = 1) {
+        if (data_vec.size() <= idx) {
+            data_vec.emplace_back();
+        }
+
+        data_vec[idx].null += number;
+        data_vec[idx].all += number;
+    }
+
+    void merge(const AggregateFunctionCountByEnumData& rhs) {
+        for (int idx = 0; idx < rhs.data_vec.size(); idx++) {
+            CountByEnumData& data =
+                    data_vec.size() <= idx ? data_vec.emplace_back() : 
data_vec[idx];
+            const CountByEnumData& rhs_data = rhs.data_vec[idx];
+            const MapType& rhs_unordered_map = rhs_data.cbe;
+            MapType& lhs_unordered_map = data.cbe;
+
+            for (auto rhs_it : rhs_unordered_map) {
+                auto lhs_it = lhs_unordered_map.find(rhs_it.first);
+                if (lhs_it != lhs_unordered_map.end()) {
+                    lhs_it->second += rhs_it.second;
+                } else {
+                    lhs_unordered_map.emplace(rhs_it.first, rhs_it.second);
+                }
+            }
+
+            data.not_null += rhs_data.not_null;
+            data.null += rhs_data.null;
+            data.all += rhs_data.all;
+        }
+    }
+
+    void write(BufferWritable& buf) const {
+        write_binary(data_vec.size(), buf);
+
+        for (const auto& data : data_vec) {
+            const MapType& unordered_map = data.cbe;
+            write_binary(unordered_map.size(), buf);
+
+            for (const auto& [key, value] : unordered_map) {
+                write_binary(value, buf);
+                write_binary(key, buf);
+            }
+
+            write_binary(data.not_null, buf);
+            write_binary(data.null, buf);
+            write_binary(data.all, buf);
+        }
+    }
+
+    void read(BufferReadable& buf) {
+        data_vec.clear();
+
+        uint64_t vec_size_number = 0;
+        read_binary(vec_size_number, buf);
+
+        for (int idx = 0; idx < vec_size_number; idx++) {
+            uint64_t element_number = 0;
+            read_binary(element_number, buf);
+
+            MapType unordered_map;
+            unordered_map.reserve(element_number);
+            for (auto i = 0; i < element_number; i++) {
+                std::string key;
+                uint64_t value;
+                read_binary(value, buf);
+                read_binary(key, buf);
+                unordered_map.emplace(std::move(key), value);
+            }
+
+            CountByEnumData data;
+            data.cbe = std::move(unordered_map);
+            read_binary(data.not_null, buf);
+            read_binary(data.null, buf);
+            read_binary(data.all, buf);
+            data_vec.emplace_back(std::move(data));
+        }
+    }
+
+    std::string get() const {
+        rapidjson::StringBuffer buffer;
+        build_json_from_vec(buffer, data_vec);
+        return std::string(buffer.GetString());
+    }
+
+private:
+    std::vector<CountByEnumData> data_vec;
+};
+
+template <typename Data>
+class AggregateFunctionCountByEnum final
+        : public IAggregateFunctionDataHelper<Data, 
AggregateFunctionCountByEnum<Data>> {
+public:
+    AggregateFunctionCountByEnum() = default;
+    AggregateFunctionCountByEnum(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, 
AggregateFunctionCountByEnum<Data>>(
+                      argument_types_) {
+        arg_count = argument_types_.size();
+    }
+
+    std::string get_name() const override { return "count_by_enum"; }
+
+    DataTypePtr get_return_type() const override { return 
std::make_shared<DataTypeString>(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, 
size_t row_num,
+             Arena* arena) const override {
+        for (int i = 0; i < arg_count; i++) {
+            const auto* nullable_column = 
check_and_get_column<ColumnNullable>(columns[i]);
+            if (nullable_column == nullptr) {
+                this->data(place).add(
+                        i, static_cast<const 
ColumnString&>(*columns[i]).get_data_at(row_num));
+            } else if (nullable_column->is_null_at(row_num)) {
+                // TODO create a null vector
+                this->data(place).add(i);
+            } else {
+                this->data(place).add(
+                        i, static_cast<const 
ColumnString&>(nullable_column->get_nested_column())
+                                   .get_data_at(row_num));
+            }
+        }
+    }
+
+    void reset(AggregateDataPtr place) const override { 
this->data(place).reset(); }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+               Arena* arena) const override {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& 
buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+                     Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& 
to) const override {
+        const std::string json_arr = this->data(place).get();
+        assert_cast<ColumnString&>(to).insert_data(json_arr.c_str(), 
json_arr.length());
+    }
+
+private:
+    size_t arg_count;
+};
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git 
a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
index 9ff47a6f57..f541f99b0a 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
@@ -35,6 +35,7 @@ void 
register_aggregate_function_min_by(AggregateFunctionSimpleFactory& factory)
 void register_aggregate_function_max_by(AggregateFunctionSimpleFactory& 
factory);
 void register_aggregate_function_avg(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_count(AggregateFunctionSimpleFactory& 
factory);
+void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory& 
factory);
 void register_aggregate_function_HLL_union_agg(AggregateFunctionSimpleFactory& 
factory);
 void register_aggregate_function_uniq(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_bit(AggregateFunctionSimpleFactory& factory);
@@ -71,6 +72,7 @@ AggregateFunctionSimpleFactory& 
AggregateFunctionSimpleFactory::instance() {
         register_aggregate_function_max_by(instance);
         register_aggregate_function_avg(instance);
         register_aggregate_function_count(instance);
+        register_aggregate_function_count_by_enum(instance);
         register_aggregate_function_count_old(instance);
         register_aggregate_function_sum_old(instance);
         register_aggregate_function_uniq(instance);
diff --git a/be/src/vec/utils/count_by_enum_helpers.hpp 
b/be/src/vec/utils/count_by_enum_helpers.hpp
new file mode 100644
index 0000000000..20c38b765b
--- /dev/null
+++ b/be/src/vec/utils/count_by_enum_helpers.hpp
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <rapidjson/document.h>
+#include <rapidjson/prettywriter.h>
+#include <rapidjson/stringbuffer.h>
+
+#include <boost/dynamic_bitset.hpp>
+
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+struct CountByEnumData {
+    std::unordered_map<std::string, uint64_t> cbe;
+    uint64_t not_null;
+    uint64_t null;
+    uint64_t all;
+};
+
+void build_json_from_vec(rapidjson::StringBuffer& buffer,
+                         const std::vector<CountByEnumData>& data_vec) {
+    rapidjson::Document doc;
+    doc.SetArray();
+    rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
+
+    int vec_size_number = data_vec.size();
+    for (int idx = 0; idx < vec_size_number; ++idx) {
+        rapidjson::Value obj(rapidjson::kObjectType);
+
+        rapidjson::Value obj_cbe(rapidjson::kObjectType);
+        std::unordered_map<std::string, uint64_t> unordered_map = 
data_vec[idx].cbe;
+        for (auto it : unordered_map) {
+            rapidjson::Value key_cbe(it.first.c_str(), allocator);
+            rapidjson::Value value_cbe(it.second);
+            obj_cbe.AddMember(key_cbe, value_cbe, allocator);
+        }
+        obj.AddMember("cbe", obj_cbe, allocator);
+        obj.AddMember("notnull", data_vec[idx].not_null, allocator);
+        obj.AddMember("null", data_vec[idx].null, allocator);
+        obj.AddMember("all", data_vec[idx].all, allocator);
+
+        doc.PushBack(obj, allocator);
+    }
+
+    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+    doc.Accept(writer);
+}
+
+} // namespace  doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/aggregate_functions/vec_count_by_enum_test.cpp 
b/be/test/vec/aggregate_functions/vec_count_by_enum_test.cpp
new file mode 100644
index 0000000000..fa953b5101
--- /dev/null
+++ b/be/test/vec/aggregate_functions/vec_count_by_enum_test.cpp
@@ -0,0 +1,298 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <rapidjson/document.h>
+
+#include "common/logging.h"
+#include "gtest/gtest.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_array.h"
+
+namespace doris::vectorized {
+
+void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory& 
factory);
+
+class VCountByEnumTest : public testing::Test {
+public:
+    AggregateFunctionPtr agg_function;
+
+    VCountByEnumTest() {}
+
+    void SetUp() {
+        AggregateFunctionSimpleFactory factory = 
AggregateFunctionSimpleFactory::instance();
+        DataTypes data_types = {
+                
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+        };
+        agg_function = factory.get("count_by_enum", data_types, true);
+        EXPECT_NE(agg_function, nullptr);
+    }
+
+    void TearDown() {}
+};
+
+TEST_F(VCountByEnumTest, testEmpty) {
+    std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place = memory.get();
+    agg_function->create(place);
+
+    ColumnString buf;
+    VectorBufferWriter buf_writer(buf);
+    agg_function->serialize(place, buf_writer);
+    buf_writer.commit();
+    LOG(INFO) << "buf size : " << buf.size();
+    VectorBufferReader buf_reader(buf.get_data_at(0));
+    agg_function->deserialize(place, buf_reader, nullptr);
+
+    std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place2 = memory2.get();
+    agg_function->create(place2);
+
+    agg_function->merge(place, place2, nullptr);
+    auto column_result = 
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+    agg_function->insert_result_into(place, *column_result);
+    auto& result = assert_cast<ColumnString&>(*column_result);
+    LOG(INFO) << "result : " << result.get_data_at(0);
+    EXPECT_EQ(result.get_data_at(0).to_string(), "[]");
+
+    auto column_result2 = 
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+    agg_function->insert_result_into(place2, *column_result2);
+    auto& result2 = assert_cast<ColumnString&>(*column_result2);
+    LOG(INFO) << "result2 : " << result2.get_data_at(0);
+    EXPECT_EQ(result2.get_data_at(0).to_string(), "[]");
+
+    agg_function->destroy(place);
+    agg_function->destroy(place2);
+}
+
+TEST_F(VCountByEnumTest, testNotNullableSample) {
+    const int batch_size = 5;
+    auto column_f1 = ColumnString::create();
+    column_f1->insert("F");
+    column_f1->insert("F");
+    column_f1->insert("M");
+    column_f1->insert("F");
+    column_f1->insert("M");
+
+    std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place = memory.get();
+    agg_function->create(place);
+    const IColumn* column[1] = {column_f1.get()};
+    for (int i = 0; i < batch_size; i++) {
+        agg_function->add(place, column, i, nullptr);
+    }
+
+    std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place2 = memory2.get();
+    agg_function->create(place2);
+
+    agg_function->merge(place2, place, nullptr);
+
+    auto column_result2 = 
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+    agg_function->insert_result_into(place2, *column_result2);
+    auto& result2 = assert_cast<ColumnString&>(*column_result2);
+
+    rapidjson::Document document;
+    document.Parse(result2.get_data_at(0).to_string().c_str());
+    const rapidjson::Value& item0 = document[0];
+    EXPECT_EQ(item0["cbe"]["M"].GetInt(), 2);
+    EXPECT_EQ(item0["cbe"]["F"].GetInt(), 3);
+    EXPECT_EQ(item0["notnull"].GetInt(), 5);
+    EXPECT_EQ(item0["null"].GetInt(), 0);
+    EXPECT_EQ(item0["all"].GetInt(), 5);
+
+    agg_function->destroy(place);
+    agg_function->destroy(place2);
+}
+
+TEST_F(VCountByEnumTest, testNullableSample) {
+    const int batch_size = 5;
+    auto column_f1 = ColumnString::create();
+    column_f1->insert("F");
+    column_f1->insert("F");
+    column_f1->insert("M");
+    ColumnPtr column_f1_ptr = std::move(column_f1);
+    auto null_map = ColumnVector<uint8_t>::create();
+    std::vector<uint8_t> offs = {0, 0, 0, 1, 1};
+    for (int i = 0; i < offs.size(); ++i) {
+        null_map->insert(offs[i]);
+    }
+
+    auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr, 
std::move(null_map));
+
+    std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place = memory.get();
+    agg_function->create(place);
+    const IColumn* column[1] = {nullable_column_f1.get()};
+    for (int i = 0; i < batch_size; i++) {
+        agg_function->add(place, column, i, nullptr);
+    }
+
+    std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place2 = memory2.get();
+    agg_function->create(place2);
+
+    agg_function->merge(place2, place, nullptr);
+
+    auto column_result2 = 
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+    agg_function->insert_result_into(place2, *column_result2);
+    auto& result2 = assert_cast<ColumnString&>(*column_result2);
+
+    rapidjson::Document document;
+    document.Parse(result2.get_data_at(0).to_string().c_str());
+    const rapidjson::Value& item0 = document[0];
+    EXPECT_EQ(item0["cbe"]["M"].GetInt(), 1);
+    EXPECT_EQ(item0["cbe"]["F"].GetInt(), 2);
+    EXPECT_EQ(item0["notnull"].GetInt(), 3);
+    EXPECT_EQ(item0["null"].GetInt(), 2);
+    EXPECT_EQ(item0["all"].GetInt(), 5);
+
+    agg_function->destroy(place);
+    agg_function->destroy(place2);
+}
+
+TEST_F(VCountByEnumTest, testNoMerge) {
+    const int batch_size = 5;
+    auto column_f1 = ColumnString::create();
+    column_f1->insert("F");
+    column_f1->insert("F");
+    column_f1->insert("M");
+    ColumnPtr column_f1_ptr = std::move(column_f1);
+    auto null_map = ColumnVector<uint8_t>::create();
+    std::vector<uint8_t> offs = {0, 0, 0, 1, 1};
+    for (int i = 0; i < offs.size(); ++i) {
+        null_map->insert(offs[i]);
+    }
+
+    auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr, 
std::move(null_map));
+
+    std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place = memory.get();
+    agg_function->create(place);
+    const IColumn* column[1] = {nullable_column_f1.get()};
+    for (int i = 0; i < batch_size; i++) {
+        agg_function->add(place, column, i, nullptr);
+    }
+
+    auto column_result = 
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+    agg_function->insert_result_into(place, *column_result);
+    auto& result = assert_cast<ColumnString&>(*column_result);
+
+    rapidjson::Document document;
+    document.Parse(result.get_data_at(0).to_string().c_str());
+    const rapidjson::Value& item0 = document[0];
+    EXPECT_EQ(item0["cbe"]["M"].GetInt(), 1);
+    EXPECT_EQ(item0["cbe"]["F"].GetInt(), 2);
+    EXPECT_EQ(item0["notnull"].GetInt(), 3);
+    EXPECT_EQ(item0["null"].GetInt(), 2);
+    EXPECT_EQ(item0["all"].GetInt(), 5);
+
+    agg_function->destroy(place);
+}
+
+TEST_F(VCountByEnumTest, testSerialize) {
+    const int batch_size = 5;
+    auto column_f1 = ColumnString::create();
+    column_f1->insert("F");
+    column_f1->insert("F");
+    column_f1->insert("M");
+    ColumnPtr column_f1_ptr = std::move(column_f1);
+    auto null_map = ColumnVector<uint8_t>::create();
+    std::vector<uint8_t> offs = {0, 0, 0, 1, 1};
+    for (int i = 0; i < offs.size(); ++i) {
+        null_map->insert(offs[i]);
+    }
+    auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr, 
std::move(null_map));
+
+    std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place = memory.get();
+    agg_function->create(place);
+    const IColumn* column[1] = {nullable_column_f1.get()};
+    for (int i = 0; i < batch_size; i++) {
+        agg_function->add(place, column, i, nullptr);
+    }
+
+    ColumnString buf;
+    VectorBufferWriter buf_writer(buf);
+    agg_function->serialize(place, buf_writer);
+    buf_writer.commit();
+    agg_function->destroy(place);
+
+    std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place2 = memory2.get();
+    agg_function->create(place2);
+
+    VectorBufferReader buf_reader(buf.get_data_at(0));
+    agg_function->deserialize(place2, buf_reader, nullptr);
+
+    auto column_result1 = 
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+    agg_function->insert_result_into(place2, *column_result1);
+    auto& result1 = assert_cast<ColumnString&>(*column_result1);
+
+    rapidjson::Document document;
+    document.Parse(result1.get_data_at(0).to_string().c_str());
+    const rapidjson::Value& item0 = document[0];
+    EXPECT_EQ(item0["cbe"]["M"].GetInt(), 1);
+    EXPECT_EQ(item0["cbe"]["F"].GetInt(), 2);
+    EXPECT_EQ(item0["notnull"].GetInt(), 3);
+    EXPECT_EQ(item0["null"].GetInt(), 2);
+    EXPECT_EQ(item0["all"].GetInt(), 5);
+
+    auto column_f1_2 = ColumnString::create();
+    column_f1_2->insert("F");
+    column_f1_2->insert("F");
+    column_f1_2->insert("M");
+    ColumnPtr column_f1_2_ptr = std::move(column_f1_2);
+    auto null_map_2 = ColumnVector<uint8_t>::create();
+    std::vector<uint8_t> offs_2 = {0, 0, 0, 1, 1};
+    for (int i = 0; i < offs.size(); ++i) {
+        null_map_2->insert(offs_2[i]);
+    }
+    auto nullable_column_f1_2 = ColumnNullable::create(column_f1_2_ptr, 
std::move(null_map_2));
+
+    std::unique_ptr<char[]> memory3(new char[agg_function->size_of_data()]);
+    AggregateDataPtr place3 = memory3.get();
+    agg_function->create(place3);
+    const IColumn* column2[1] = {nullable_column_f1_2.get()};
+    for (int i = 0; i < batch_size; i++) {
+        agg_function->add(place3, column2, i, nullptr);
+    }
+
+    agg_function->merge(place2, place3, nullptr);
+
+    auto column_result2 = 
((DataTypePtr)std::make_shared<DataTypeString>())->create_column();
+    agg_function->insert_result_into(place2, *column_result2);
+    auto& result2 = assert_cast<ColumnString&>(*column_result2);
+
+    rapidjson::Document document2;
+    document2.Parse(result2.get_data_at(0).to_string().c_str());
+    const rapidjson::Value& item0_2 = document2[0];
+    EXPECT_EQ(item0_2["cbe"]["M"].GetInt(), 2);
+    EXPECT_EQ(item0_2["cbe"]["F"].GetInt(), 4);
+    EXPECT_EQ(item0_2["notnull"].GetInt(), 6);
+    EXPECT_EQ(item0_2["null"].GetInt(), 4);
+    EXPECT_EQ(item0_2["all"].GetInt(), 10);
+
+    agg_function->destroy(place2);
+    agg_function->destroy(place3);
+}
+} // namespace doris::vectorized
diff --git 
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md 
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
new file mode 100644
index 0000000000..379661d0cf
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
@@ -0,0 +1,152 @@
+---
+{
+    "title": "COUNT_BY_ENUM",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## COUNT_BY_ENUM 
+
+<version since="1.2.0">
+
+COUNT_BY_ENUM
+
+</version>
+
+### Description
+#### Syntax
+
+`count_by_enum(expr1, expr2, ... , exprN);`
+
+Treats the data in a column as an enumeration and counts the number of values 
in each enumeration. Returns the number of enumerated values for each column, 
and the number of non-null values versus the number of null values.
+
+#### Arguments
+
+`expr1` — At least one input must be specified. The value is a column of type 
`STRING`.
+
+##### Returned value
+
+Returns a JSONArray string.
+
+For example:
+```json
+[{
+  "cbe": {
+    "F": 100,
+    "M": 99
+  },
+  "notnull": 199,
+  "null": 1,
+  "all": 200
+}, {
+  "cbe": {
+    "20": 10,
+    "30": 5,
+    "35": 1
+  },
+  "notnull": 16,
+  "null": 184,
+  "all": 200
+}, {
+  "cbe": {
+    "China": 10,
+    "United States": 9,
+    "England": 20,
+    "Germany": 30
+  },
+  "notnull": 69,
+  "null": 131,
+  "all": 200
+}]
+```
+Description: The return value is a JSON array string and the order of the 
internal objects is the order of the input parameters.
+* cbe: count of non-null values based on enumeration values
+* notnull: number of non-null values.
+* null: number of null values
+* all: total number, including both null and non-null values.
+
+
+### example
+
+```sql
+DROP TABLE IF EXISTS count_by_enum_test;
+
+CREATE TABLE count_by_enum_test(
+                                   `id` varchar(1024) NULL,
+                                   `f1` text REPLACE_IF_NOT_NULL NULL,
+                                   `f2` text REPLACE_IF_NOT_NULL NULL,
+                                   `f3` text REPLACE_IF_NOT_NULL NULL
+)
+AGGREGATE KEY(`id`)
+DISTRIBUTED BY HASH(id) BUCKETS 3 
+PROPERTIES ( 
+    "replication_num" = "1"
+);
+
+INSERT into count_by_enum_test (id, f1, f2, f3) values
+                                                    (1, "F", "10", "China"),
+                                                    (2, "F", "20", "China"),
+                                                    (3, "M", NULL, "United 
States"),
+                                                    (4, "M", NULL, "United 
States"),
+                                                    (5, "M", NULL, "England");
+
+SELECT * from count_by_enum_test;
+
++------+------+------+---------------+
+| id   | f1   | f2   | f3            |
++------+------+------+---------------+
+| 1    | F    | 10   | China         |
+| 2    | F    | 20   | China         |
+| 3    | M    | NULL | United States |
+| 4    | M    | NULL | United States |
+| 5    | M    | NULL | England       |
++------+------+------+---------------+
+
+select count_by_enum(f1) from count_by_enum_test;
+
++------------------------------------------------------+
+| count_by_enum(`f1`)                                  |
++------------------------------------------------------+
+| [{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5}] |
++------------------------------------------------------+
+
+select count_by_enum(f2) from count_by_enum_test;
+
++--------------------------------------------------------+
+| count_by_enum(`f2`)                                    |
++--------------------------------------------------------+
+| [{"cbe":{"10":1,"20":1},"notnull":2,"null":3,"all":5}] |
++--------------------------------------------------------+
+
+select count_by_enum(f1,f2,f3) from count_by_enum_test;
+
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| count_by_enum(`f1`, `f2`, `f3`)                                              
                                                                                
                            |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| 
[{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5},{"cbe":{"20":1,"10":1},"notnull":2,"null":3,"all":5},{"cbe":{"England":1,"United
 States":2,"China":2},"notnull":5,"null":0,"all":5}] |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    
+```
+
+### keywords
+
+COUNT_BY_ENUM
diff --git a/docs/sidebars.json b/docs/sidebars.json
index d2de428a60..5318df7b86 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -539,7 +539,8 @@
                                 
"sql-manual/sql-functions/aggregate-functions/sequence_match",
                                 
"sql-manual/sql-functions/aggregate-functions/sequence_count",
                                 
"sql-manual/sql-functions/aggregate-functions/grouping",
-                                
"sql-manual/sql-functions/aggregate-functions/grouping_id"
+                                
"sql-manual/sql-functions/aggregate-functions/grouping_id",
+                                
"sql-manual/sql-functions/aggregate-functions/count_by_enum"
                             ]
                         },
                         {
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
new file mode 100644
index 0000000000..873c446345
--- /dev/null
+++ 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/count_by_enum.md
@@ -0,0 +1,151 @@
+---
+{
+    "title": "COUNT_BY_ENUM",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## COUNT_BY_ENUM
+
+<version since="1.2.0">
+
+COUNT_BY_ENUM
+
+</version>
+
+### description
+#### Syntax
+
+`count_by_enum(expr1, expr2, ... , exprN);`
+
+将列中数据看作枚举值，统计每个枚举值的个数。返回各个列枚举值的个数，以及非 null 值的个数与 null 值的个数。
+
+#### Arguments
+
+`expr1` — 至少填写一个输入。值为字符串（STRING）类型的列。
+
+##### Returned value
+
+返回一个 JSONArray 字符串。
+
+例如：
+```json
+[{
+       "cbe": {
+               "F": 100,
+               "M": 99
+       },
+       "notnull": 199,
+       "null": 1,
+       "all": 200
+}, {
+       "cbe": {
+               "20": 10,
+               "30": 5,
+               "35": 1
+       },
+       "notnull": 16,
+       "null": 184,
+       "all": 200
+}, {
+       "cbe": {
+               "北京": 10,
+               "上海": 9,
+               "广州": 20,
+               "深圳": 30
+       },
+       "notnull": 69,
+       "null": 131,
+       "all": 200
+}]
+```
+说明：返回值为一个 JSON array 字符串，内部对象的顺序是输入参数的顺序。
+* cbe：根据枚举值统计非 null 值的统计结果
+* notnull：非 null 的个数
+* null：null 值个数
+* all：总数，包括 null 值与非 null 值
+
+### example
+
+```sql
+DROP TABLE IF EXISTS count_by_enum_test;
+
+CREATE TABLE count_by_enum_test(
+                `id` varchar(1024) NULL,
+                `f1` text REPLACE_IF_NOT_NULL NULL,
+                `f2` text REPLACE_IF_NOT_NULL NULL,
+                `f3` text REPLACE_IF_NOT_NULL NULL
+                )
+AGGREGATE KEY(`id`)
+DISTRIBUTED BY HASH(id) BUCKETS 3 
+PROPERTIES ( 
+    "replication_num" = "1"
+); 
+
+INSERT into count_by_enum_test (id, f1, f2, f3) values
+                                        (1, "F", "10", "北京"),
+                                        (2, "F", "20", "北京"),
+                                        (3, "M", NULL, "上海"),
+                                        (4, "M", NULL, "上海"),
+                                        (5, "M", NULL, "广州");
+
+SELECT * from count_by_enum_test;
+
++------+------+------+--------+
+| id   | f1   | f2   | f3     |
++------+------+------+--------+
+| 2    | F    | 20   | 北京   |
+| 3    | M    | NULL | 上海   |
+| 4    | M    | NULL | 上海   |
+| 5    | M    | NULL | 广州   |
+| 1    | F    | 10   | 北京   |
++------+------+------+--------+
+
+select count_by_enum(f1) from count_by_enum_test;
+
++------------------------------------------------------+
+| count_by_enum(`f1`)                                  |
++------------------------------------------------------+
+| [{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5}] |
++------------------------------------------------------+
+
+select count_by_enum(f2) from count_by_enum_test;
+
++--------------------------------------------------------+
+| count_by_enum(`f2`)                                    |
++--------------------------------------------------------+
+| [{"cbe":{"10":1,"20":1},"notnull":2,"null":3,"all":5}] |
++--------------------------------------------------------+
+
+select count_by_enum(f1,f2,f3) from count_by_enum_test;
+
++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| count_by_enum(`f1`, `f2`, `f3`)                                              
                                                                                
                     |
++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| 
[{"cbe":{"M":3,"F":2},"notnull":5,"null":0,"all":5},{"cbe":{"20":1,"10":1},"notnull":2,"null":3,"all":5},{"cbe":{"广州":1,"上海":2,"北京":2},"notnull":5,"null":0,"all":5}]
       |
++-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+```
+
+### keywords
+
+COUNT_BY_ENUM
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
index 44f92bdd63..0e361afb79 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
@@ -27,6 +27,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnionInt;
 import org.apache.doris.nereids.trees.expressions.functions.agg.CollectList;
 import org.apache.doris.nereids.trees.expressions.functions.agg.CollectSet;
 import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
 import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
 import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
 import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
@@ -85,6 +86,7 @@ public class BuiltinAggregateFunctions implements 
FunctionHelper {
             agg(CollectList.class, "collect_list"),
             agg(CollectSet.class, "collect_set"),
             agg(Count.class, "count"),
+            agg(CountByEnum.class, "count_by_enum"),
             agg(GroupBitAnd.class, "group_bit_and"),
             agg(GroupBitOr.class, "group_bit_or"),
             agg(GroupBitXor.class, "group_bit_xor"),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 869b59cd86..616733918a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -203,6 +203,7 @@ public class FunctionSet<T> {
     public static final String HISTOGRAM = "histogram";
     public static final String HIST = "hist";
     public static final String MAP_AGG = "map_agg";
+    public static final String COUNT_BY_ENUM = "count_by_enum";
 
     private static final Map<Type, String> TOPN_UPDATE_SYMBOL =
             ImmutableMap.<Type, String>builder()
@@ -1613,6 +1614,21 @@ public class FunctionSet<T> {
                         "lead", Lists.newArrayList(t, Type.BIGINT), t, t, 
true));
         }
 
+        // count_by_enum
+        addBuiltin(AggregateFunction.createBuiltin(COUNT_BY_ENUM,
+                Lists.newArrayList(Type.STRING),
+                Type.STRING,
+                Type.STRING,
+                true,
+                "",
+                "",
+                "",
+                "",
+                "",
+                "",
+                "",
+                false, true, false, true));
+
     }
 
     public Map<String, List<Function>> getVectorizedFunctions() {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountByEnum.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountByEnum.java
new file mode 100644
index 0000000000..8232e9e403
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountByEnum.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.agg;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/** count_by_enum agg function. */
+public class CountByEnum extends AggregateFunction implements 
ExplicitlyCastableSignature, AlwaysNotNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(ArrayType.of(StringType.INSTANCE)).args(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 or more arguments.
+     */
+    public CountByEnum(Expression arg, Expression... varArgs) {
+        super("count_by_enum", ExpressionUtils.mergeArguments(arg, varArgs));
+    }
+
+    @Override
+    public AggregateFunction withDistinctAndChildren(boolean distinct, 
List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new CollectList(distinct, children.get(0));
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitCountByEnum(this, context);
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
index f828a486ff..280b8c47ab 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
@@ -28,6 +28,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnionInt;
 import org.apache.doris.nereids.trees.expressions.functions.agg.CollectList;
 import org.apache.doris.nereids.trees.expressions.functions.agg.CollectSet;
 import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
 import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
 import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
 import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
@@ -118,6 +119,10 @@ public interface AggregateFunctionVisitor<R, C> {
         return visitAggregateFunction(count, context);
     }
 
+    default R visitCountByEnum(CountByEnum count, C context) {
+        return visitAggregateFunction(count, context);
+    }
+
     default R visitMultiDistinctCount(MultiDistinctCount multiDistinctCount, C 
context) {
         return visitAggregateFunction(multiDistinctCount, context);
     }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java
index 18b45c6bf3..fd58f6e833 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java
@@ -527,4 +527,46 @@ public class AggregateTest extends TestWithFeService {
             }
         } while (false);
     }
+
+    @Test
+    public void testCountByEnumAnalysisException() throws Exception {
+        ConnectContext ctx = UtFrameUtils.createDefaultCtx();
+
+        // normal.
+        do {
+            String query = "select count_by_enum(name) from "
+                    + DB_NAME + "." + TABLE_NAME;
+            try {
+                UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
+            } catch (Exception e) {
+                Assert.fail("must be AnalysisException.");
+            }
+        } while (false);
+
+        do {
+            String query = "select count_by_enum(name, commission) from "
+                    + DB_NAME + "." + TABLE_NAME;
+            try {
+                UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
+            } catch (Exception e) {
+                Assert.fail("must be AnalysisException.");
+            }
+        } while (false);
+
+        // less argument.
+        do {
+            String query = "select count_by_enum() from "
+                    + DB_NAME + "." + TABLE_NAME;
+            try {
+                UtFrameUtils.parseAndAnalyzeStmt(query, ctx);
+            } catch (AnalysisException e) {
+                Assert.assertTrue(e.getMessage().contains("No matching 
function with signature: count_by_enum()"));
+                break;
+            } catch (Exception e) {
+                Assert.fail("must be AnalysisException.");
+            }
+            Assert.fail("must be AnalysisException.");
+        } while (false);
+
+    }
 }
diff --git 
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.out
 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.out
new file mode 100644
index 0000000000..cd4fe3d463
--- /dev/null
+++ 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.out
@@ -0,0 +1,76 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+0
+
+-- !select --
+5
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+0
+
+-- !select --
+5
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+2
+
+-- !select --
+3
+
+-- !select --
+5
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+5
+
+-- !select --
+0
+
+-- !select --
+5
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.groovy
 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.groovy
new file mode 100644
index 0000000000..d111f4ada4
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_count_by_enum.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+// 
/testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("test_aggregate_count_by_enum") {
+    sql "set enable_vectorized_engine = true"
+
+    def tableName = "count_by_enum_test"
+    sql "DROP TABLE IF EXISTS ${tableName}"
+    sql """
+           CREATE TABLE IF NOT EXISTS ${tableName} (
+               `id` varchar(1024) NULL,
+            `f1` text REPLACE_IF_NOT_NULL NULL,
+            `f2` text REPLACE_IF_NOT_NULL NULL,
+            `f3` text REPLACE_IF_NOT_NULL NULL
+           )
+           AGGREGATE KEY(`id`)
+        DISTRIBUTED BY HASH(id) BUCKETS 3
+           PROPERTIES (
+             "replication_num" = "1"
+           )
+    """
+
+    sql "INSERT INTO ${tableName} values(1, \"F\", \"10\", \"China\"),(2, 
\"F\", \"20\", \"China\"),(3, \"M\", NULL, \"United States\"),(4, \"M\", NULL, 
\"United States\"),(5, \"M\", NULL, \"England\");"
+
+    qt_select "select get_json_string(count_by_enum(f1), '\$.[0].cbe.F') from 
${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1), '\$.[0].cbe.M') from 
${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1), '\$.[0].notnull') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1), '\$.[0].null') from 
${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1), '\$.[0].all') from 
${tableName}"
+
+    qt_select "select get_json_string(count_by_enum(f2), '\$.[0].cbe.F') from 
${tableName}"
+    qt_select "select get_json_string(count_by_enum(f2), '\$.[0].cbe.M') from 
${tableName}"
+    qt_select "select get_json_string(count_by_enum(f2), '\$.[0].notnull') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f2), '\$.[0].null') from 
${tableName}"
+    qt_select "select get_json_string(count_by_enum(f2), '\$.[0].all') from 
${tableName}"
+
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].cbe.F') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].cbe.M') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), 
'\$.[0].notnull') from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].null') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[0].all') 
from ${tableName}"
+
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].cbe.F') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].cbe.M') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), 
'\$.[1].notnull') from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].null') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[1].all') 
from ${tableName}"
+
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].cbe.F') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].cbe.M') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), 
'\$.[2].notnull') from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].null') 
from ${tableName}"
+    qt_select "select get_json_string(count_by_enum(f1,f2,f3), '\$.[2].all') 
from ${tableName}"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch master updated: [Feature](count_by_enum) support count_by_enum function (#22071)

Reply via email to