This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 31bec06b718 branch-4.0: [Feature](func) Support function MAKE_SET 
#56367 (#56524)
31bec06b718 is described below

commit 31bec06b718651df866690551bb724d20640c4df
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Sep 27 21:58:43 2025 +0800

    branch-4.0: [Feature](func) Support function MAKE_SET #56367 (#56524)
    
    Cherry-picked from #56367
    
    Co-authored-by: linrrarity <[email protected]>
---
 be/src/vec/functions/function_string.cpp           |   1 +
 be/src/vec/functions/function_string.h             | 103 +++++++++++++++++++++
 .../doris/catalog/BuiltinScalarFunctions.java      |   4 +-
 .../functions/executable/StringArithmetic.java     |  20 ++++
 .../expressions/functions/scalar/MakeSet.java      |  78 ++++++++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |   5 +
 .../string_functions/test_string_function.out      |  37 ++++++++
 .../string_functions/test_string_function.groovy   |  51 ++++++++++
 8 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/functions/function_string.cpp 
b/be/src/vec/functions/function_string.cpp
index f991a56e950..f6a94ce94b8 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -1475,6 +1475,7 @@ void register_function_string(SimpleFunctionFactory& 
factory) {
     factory.register_function<FunctionNgramSearch>();
     factory.register_function<FunctionXPathString>();
     factory.register_function<FunctionCrc32Internal>();
+    factory.register_function<FunctionMakeSet>();
 
     factory.register_alias(FunctionLeft::name, "strleft");
     factory.register_alias(FunctionRight::name, "strright");
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 2efdfe21a08..16e788843be 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -4959,6 +4959,109 @@ private:
     }
 };
 
+class FunctionMakeSet : public IFunction {
+public:
+    static constexpr auto name = "make_set";
+    static FunctionPtr create() { return std::make_shared<FunctionMakeSet>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
+    bool use_default_implementation_for_nulls() const override { return false; 
}
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        if (arguments[0].get()->is_nullable()) {
+            return make_nullable(std::make_shared<DataTypeString>());
+        }
+        return std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        uint32_t result, size_t input_rows_count) const 
override {
+        auto res_col = ColumnString::create();
+        auto null_map = ColumnUInt8::create();
+
+        const auto& [bit_col, bit_const] =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+
+        if (bit_const) {
+            if (bit_col->is_null_at(0)) {
+                res_col->insert_many_defaults(input_rows_count);
+                null_map->insert_many_vals(1, input_rows_count);
+            } else {
+                const uint64_t bit_data =
+                        assert_cast<const 
ColumnInt64*>(bit_col.get())->get_element(0);
+                vector_execute<true>(block, arguments, input_rows_count, 
*res_col, bit_data,
+                                     null_map->get_data());
+            }
+        } else if (const auto* bit_data = 
check_and_get_column<ColumnNullable>(bit_col.get())) {
+            null_map->insert_range_from(bit_data->get_null_map_column(), 0, 
input_rows_count);
+            vector_execute<false>(block, arguments, input_rows_count, *res_col,
+                                  assert_cast<const 
ColumnInt64&>(bit_data->get_nested_column()),
+                                  null_map->get_data());
+
+        } else {
+            null_map->get_data().resize_fill(input_rows_count, 0);
+            vector_execute<false>(block, arguments, input_rows_count, *res_col,
+                                  assert_cast<const 
ColumnInt64&>(*bit_col.get()),
+                                  null_map->get_data());
+        }
+
+        if (block.get_by_position(arguments[0]).type.get()->is_nullable()) {
+            block.replace_by_position(
+                    result, ColumnNullable::create(std::move(res_col), 
std::move(null_map)));
+        } else {
+            block.replace_by_position(result, std::move(res_col));
+        }
+        return Status::OK();
+    }
+
+private:
+    template <bool bit_const>
+    void vector_execute(const Block& block, const ColumnNumbers& arguments, 
size_t input_rows_count,
+                        ColumnString& res_col, const ColumnInt64& bit_col,
+                        PaddedPODArray<uint8_t>& null_map) const {
+        if constexpr (bit_const) {
+            uint64_t bit = bit_col.get_element(0);
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                execute_one_row(block, arguments, res_col, bit, i);
+            }
+        } else {
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                if (null_map[i]) {
+                    res_col.insert_default();
+                    continue;
+                }
+                execute_one_row(block, arguments, res_col, 
bit_col.get_element(i), i);
+            }
+        }
+    }
+
+    void execute_one_row(const Block& block, const ColumnNumbers& arguments, 
ColumnString& res_col,
+                         uint64_t bit, int row_num) const {
+        static constexpr char SEPARATOR = ',';
+        uint64_t pos = __builtin_ffsll(bit);
+        ColumnString::Chars data;
+        while (pos != 0 && pos < arguments.size() && bit != 0) {
+            auto col = block.get_by_position(arguments[pos]).column;
+            if (!col->is_null_at(row_num)) {
+                /* Here insert `str,` directly to support the case below:
+                 * SELECT MAKE_SET(3, '', 'a');
+                 * the exception result should be ',a'
+                 */
+                auto s_ref = col->get_data_at(row_num);
+                data.insert(s_ref.data, s_ref.data + s_ref.size);
+                data.push_back(SEPARATOR);
+            }
+            bit &= ~(1ULL << (pos - 1));
+            pos = __builtin_ffsll(bit);
+        }
+        // remove the last ','
+        if (!data.empty()) {
+            data.pop_back();
+        }
+        res_col.insert_data(reinterpret_cast<const char*>(data.data()), 
data.size());
+    }
+};
+
 // ATTN: for debug only
 // compute crc32 hash value as the same way in 
`VOlapTablePartitionParam::find_tablets()`
 class FunctionCrc32Internal : public IFunction {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 78c79114f0d..f7139bb563d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -306,6 +306,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Lpad;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Ltrim;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.LtrimIn;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeDate;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeSet;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsEntry;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue;
@@ -1045,7 +1046,8 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(AISummarize.class, "ai_summarize"),
             scalar(AISimilarity.class, "ai_similarity"),
             scalar(Embed.class, "embed"),
-            scalar(Uniform.class, "uniform"));
+            scalar(Uniform.class, "uniform"),
+            scalar(MakeSet.class, "make_set"));
 
     public static final BuiltinScalarFunctions INSTANCE = new 
BuiltinScalarFunctions();
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index 9faa122e19d..e04d4eca2b4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -1121,4 +1121,24 @@ public class StringArithmetic {
 
         return castStringLikeLiteral(first, result);
     }
+
+    /**
+     * Executable arithmetic functions make_set
+     */
+    @ExecFunction(name = "make_set")
+    public static Expression make_set(BigIntLiteral bitLiteral, 
StringLikeLiteral... args) {
+        long bit = bitLiteral.getValue();
+        final StringBuilder sb = new StringBuilder();
+        int pos = Long.numberOfTrailingZeros(bit);
+        while (pos != 64 && pos < args.length && bit != 0) {
+            sb.append(args[pos].getValue());
+            sb.append(',');
+            bit &= ~(1 << pos);
+            pos = Long.numberOfTrailingZeros(bit);
+        }
+        if (sb.length() != 0) {
+            sb.deleteCharAt(sb.length() - 1);
+        }
+        return castStringLikeLiteral(args[0], sb.toString());
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MakeSet.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MakeSet.java
new file mode 100644
index 00000000000..60e66a746ce
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MakeSet.java
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ImplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'make_set'.
+ */
+public class MakeSet extends ScalarFunction implements 
ImplicitlyCastableSignature {
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).varArgs(BigIntType.INSTANCE, 
VarcharType.SYSTEM_DEFAULT),
+            
FunctionSignature.ret(StringType.INSTANCE).varArgs(BigIntType.INSTANCE, 
StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 2 or more arguments.
+     */
+    public MakeSet(Expression arg0, Expression arg1, Expression... varArgs) {
+        super("make_set", ExpressionUtils.mergeArguments(arg0, arg1, varArgs));
+    }
+
+    /** constructor for withChildren and reuse signature */
+    private MakeSet(ScalarFunctionParams functionParams) {
+        super(functionParams);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public MakeSet withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() >= 2);
+        return new MakeSet(getFunctionParams(children));
+    }
+
+    @Override
+    public boolean nullable() {
+        return child(0).nullable();
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitMakeSet(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index f3077362cb8..7c948f7b9e9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -311,6 +311,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Lpad;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Ltrim;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.LtrimIn;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeDate;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeSet;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsEntry;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue;
@@ -2565,4 +2566,8 @@ public interface ScalarFunctionVisitor<R, C> {
     default R visitUniform(Uniform uniform, C context) {
         return visitScalarFunction(uniform, context);
     }
+
+    default R visitMakeSet(MakeSet makeSet, C context) {
+        return visitScalarFunction(makeSet, context);
+    }
 }
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 433447d8fff..c34470a3ff4 100644
--- 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
@@ -677,3 +677,40 @@ true
 -- !sql_random_bytes --
 \N
 
+-- !mask_set_1 --
+apple
+blue
+dog,cat
+large
+hot
+tuesday,wednesday
+one,two,three
+
+test2,test3
+汽车,火车
+\N
+
+,should after ,
+first,third
+
+-- !mask_set_2 --
+apple
+blue
+dog,cat
+large
+hot
+tuesday,wednesday
+one,two,three
+
+test1
+自行车
+a,b
+
+,useless
+second,third
+
+-- !mask_set_3 --
+x2,x4,x26,x51,x62,x63,x64
+
+-- !mask_set_4 --
+,y6
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
index 2aae6e1deac..9d8662a4f48 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
@@ -429,4 +429,55 @@ suite("test_string_function", "arrow_flight_sql") {
         sql("""select/*+SET_VAR(enable_fold_constant_by_be=true)*/ random(10) 
from numbers("number" = "10");""")
         contains "final projections: random(10)"
     }
+
+    sql "DROP TABLE IF EXISTS test_make_set;"
+    sql"""CREATE TABLE test_make_set (
+        id int,
+        bit_num BIGINT,
+        vc1 VARCHAR(50),
+        vc2 VARCHAR(50),
+        vc3 VARCHAR(50)
+    )
+    DUPLICATE KEY(id)
+    DISTRIBUTED BY HASH(id) BUCKETS 1
+    PROPERTIES ( 'replication_num' = '1' );"""
+
+    sql"""INSERT INTO test_make_set (id, bit_num, vc1, vc2, vc3) VALUES
+    (1, 1, 'apple', 'orange', NULL),
+    (2, 2, 'red', 'blue', NULL),
+    (3, 3, 'dog', 'cat', 'bird'),
+    (4, 4, 'small', 'medium', 'large'),
+    (5, 5, 'hot', 'warm', NULL),
+    (6, 6, 'monday', 'tuesday', 'wednesday'),
+    (7, 7, 'one', 'two', 'three'),
+    (8, 0, 'hello', 'world', NULL),
+    (9, -2, 'test1', 'test2', 'test3'),
+    (10, -3, '汽车', '自行车', '火车'),
+    (11, NULL, 'a', 'b', 'c'),
+    (12, 7, NULL, NULL, NULL),
+    (13, 3, '', 'should after ,', 'useless'),
+    (14, BIT_SHIFT_LEFT(1, 50) - 3, 'first', 'second', 'third');"""
+
+    qt_mask_set_1"""SELECT MAKE_SET(bit_num, vc1, vc2, vc3) FROM 
test_make_set;"""
+    qt_mask_set_2"""SELECT MAKE_SET(id, vc1, vc2, vc3) FROM test_make_set;"""
+    qt_mask_set_3"""SELECT MAKE_SET(BIT_SHIFT_LEFT(1, 63) + BIT_SHIFT_LEFT(1, 
62) + BIT_SHIFT_LEFT(1, 61) + BIT_SHIFT_LEFT(1, 50) + BIT_SHIFT_LEFT(1, 25) + 
BIT_SHIFT_LEFT(1, 3) + BIT_SHIFT_LEFT(1, 1), 
'x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12','x13','x14','x15','x16','x17','x18','x19','x20','x21','x22','x23','x24','x25','x26','x27','x28','x29','x30','x31','x32','x33','x34','x35','x36','x37','x38','x39','x40','x41','x42','x43','x44','x45','x46','x47','x48','x49','x50',
 [...]
+    qt_mask_set_4"""SELECT MAKE_SET(BIT_SHIFT_LEFT(1, 62) + BIT_SHIFT_LEFT(1, 
60) + BIT_SHIFT_LEFT(1, 58) + BIT_SHIFT_LEFT(1, 45) + BIT_SHIFT_LEFT(1, 5) + 
BIT_SHIFT_LEFT(1, 2), 'y1', NULL, '', 'y4','y5','y6','y7','y8','y9','y10', 
'y11','y12','y13','y14','y15','y16','y17','y18','y19','y20', 
'y21','y22','y23','y24','y25','y26','y27','y28','y29','y30', 
'y31','y32','y33','y34','y35','y36','y37','y38','y39','y40', 
'y41','y42','y43','y44','y45',NULL,'y47','y48');"""
+
+    testFoldConst("SELECT MAKE_SET(1, 'Doris', 'Apache', 'Database');")
+    testFoldConst("SELECT MAKE_SET(2, 'hello', 'goodbye', 'world');")
+    testFoldConst("SELECT MAKE_SET(3, NULL, '你好', '世界');")
+    testFoldConst("SELECT MAKE_SET(-2, 'a', 'b', 'c');")
+    testFoldConst("SELECT MAKE_SET(NULL, 'a', 'b', 'c');")
+    testFoldConst("SELECT MAKE_SET(4, 'a', 'b', NULL);")
+    testFoldConst("SELECT MAKE_SET(4611686018427387903, 'a', 'b', 'c');")
+    testFoldConst("SELECT MAKE_SET(BIT_SHIFT_LEFT(1, 50) - 3, 'first', 
'second', 'third');")
+    testFoldConst("SELECT MAKE_SET(3, '', 'a');")
+
+    test {
+        sql"""SELECT MAKE_SET(184467440737095516156, 'a', 'b', 'c');"""
+        exception "Can not find the compatibility function signature"
+    }
+
+    sql """DROP TABLE IF EXISTS test_make_set;"""
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to