This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 31bec06b718 branch-4.0: [Feature](func) Support function MAKE_SET
#56367 (#56524)
31bec06b718 is described below
commit 31bec06b718651df866690551bb724d20640c4df
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Sep 27 21:58:43 2025 +0800
branch-4.0: [Feature](func) Support function MAKE_SET #56367 (#56524)
Cherry-picked from #56367
Co-authored-by: linrrarity <[email protected]>
---
be/src/vec/functions/function_string.cpp | 1 +
be/src/vec/functions/function_string.h | 103 +++++++++++++++++++++
.../doris/catalog/BuiltinScalarFunctions.java | 4 +-
.../functions/executable/StringArithmetic.java | 20 ++++
.../expressions/functions/scalar/MakeSet.java | 78 ++++++++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
.../string_functions/test_string_function.out | 37 ++++++++
.../string_functions/test_string_function.groovy | 51 ++++++++++
8 files changed, 298 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index f991a56e950..f6a94ce94b8 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -1475,6 +1475,7 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionNgramSearch>();
factory.register_function<FunctionXPathString>();
factory.register_function<FunctionCrc32Internal>();
+ factory.register_function<FunctionMakeSet>();
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index 2efdfe21a08..16e788843be 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -4959,6 +4959,109 @@ private:
}
};
+class FunctionMakeSet : public IFunction {
+public:
+ static constexpr auto name = "make_set";
+ static FunctionPtr create() { return std::make_shared<FunctionMakeSet>(); }
+ String get_name() const override { return name; }
+ size_t get_number_of_arguments() const override { return 0; }
+ bool is_variadic() const override { return true; }
+ bool use_default_implementation_for_nulls() const override { return false;
}
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ if (arguments[0].get()->is_nullable()) {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+ return std::make_shared<DataTypeString>();
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ uint32_t result, size_t input_rows_count) const
override {
+ auto res_col = ColumnString::create();
+ auto null_map = ColumnUInt8::create();
+
+ const auto& [bit_col, bit_const] =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+
+ if (bit_const) {
+ if (bit_col->is_null_at(0)) {
+ res_col->insert_many_defaults(input_rows_count);
+ null_map->insert_many_vals(1, input_rows_count);
+ } else {
+ const uint64_t bit_data =
+ assert_cast<const
ColumnInt64*>(bit_col.get())->get_element(0);
+ vector_execute<true>(block, arguments, input_rows_count,
*res_col, bit_data,
+ null_map->get_data());
+ }
+ } else if (const auto* bit_data =
check_and_get_column<ColumnNullable>(bit_col.get())) {
+ null_map->insert_range_from(bit_data->get_null_map_column(), 0,
input_rows_count);
+ vector_execute<false>(block, arguments, input_rows_count, *res_col,
+ assert_cast<const
ColumnInt64&>(bit_data->get_nested_column()),
+ null_map->get_data());
+
+ } else {
+ null_map->get_data().resize_fill(input_rows_count, 0);
+ vector_execute<false>(block, arguments, input_rows_count, *res_col,
+ assert_cast<const
ColumnInt64&>(*bit_col.get()),
+ null_map->get_data());
+ }
+
+ if (block.get_by_position(arguments[0]).type.get()->is_nullable()) {
+ block.replace_by_position(
+ result, ColumnNullable::create(std::move(res_col),
std::move(null_map)));
+ } else {
+ block.replace_by_position(result, std::move(res_col));
+ }
+ return Status::OK();
+ }
+
+private:
+ template <bool bit_const>
+ void vector_execute(const Block& block, const ColumnNumbers& arguments,
size_t input_rows_count,
+ ColumnString& res_col, const ColumnInt64& bit_col,
+ PaddedPODArray<uint8_t>& null_map) const {
+ if constexpr (bit_const) {
+ uint64_t bit = bit_col.get_element(0);
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ execute_one_row(block, arguments, res_col, bit, i);
+ }
+ } else {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ res_col.insert_default();
+ continue;
+ }
+ execute_one_row(block, arguments, res_col,
bit_col.get_element(i), i);
+ }
+ }
+ }
+
+ void execute_one_row(const Block& block, const ColumnNumbers& arguments,
ColumnString& res_col,
+ uint64_t bit, int row_num) const {
+ static constexpr char SEPARATOR = ',';
+ uint64_t pos = __builtin_ffsll(bit);
+ ColumnString::Chars data;
+ while (pos != 0 && pos < arguments.size() && bit != 0) {
+ auto col = block.get_by_position(arguments[pos]).column;
+ if (!col->is_null_at(row_num)) {
+ /* Here insert `str,` directly to support the case below:
+ * SELECT MAKE_SET(3, '', 'a');
+ * the exception result should be ',a'
+ */
+ auto s_ref = col->get_data_at(row_num);
+ data.insert(s_ref.data, s_ref.data + s_ref.size);
+ data.push_back(SEPARATOR);
+ }
+ bit &= ~(1ULL << (pos - 1));
+ pos = __builtin_ffsll(bit);
+ }
+ // remove the last ','
+ if (!data.empty()) {
+ data.pop_back();
+ }
+ res_col.insert_data(reinterpret_cast<const char*>(data.data()),
data.size());
+ }
+};
+
// ATTN: for debug only
// compute crc32 hash value as the same way in
`VOlapTablePartitionParam::find_tablets()`
class FunctionCrc32Internal : public IFunction {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 78c79114f0d..f7139bb563d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -306,6 +306,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Lpad;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Ltrim;
import org.apache.doris.nereids.trees.expressions.functions.scalar.LtrimIn;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeDate;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeSet;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsEntry;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue;
@@ -1045,7 +1046,8 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(AISummarize.class, "ai_summarize"),
scalar(AISimilarity.class, "ai_similarity"),
scalar(Embed.class, "embed"),
- scalar(Uniform.class, "uniform"));
+ scalar(Uniform.class, "uniform"),
+ scalar(MakeSet.class, "make_set"));
public static final BuiltinScalarFunctions INSTANCE = new
BuiltinScalarFunctions();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
index 9faa122e19d..e04d4eca2b4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java
@@ -1121,4 +1121,24 @@ public class StringArithmetic {
return castStringLikeLiteral(first, result);
}
+
+ /**
+ * Executable arithmetic functions make_set
+ */
+ @ExecFunction(name = "make_set")
+ public static Expression make_set(BigIntLiteral bitLiteral,
StringLikeLiteral... args) {
+ long bit = bitLiteral.getValue();
+ final StringBuilder sb = new StringBuilder();
+ int pos = Long.numberOfTrailingZeros(bit);
+ while (pos != 64 && pos < args.length && bit != 0) {
+ sb.append(args[pos].getValue());
+ sb.append(',');
+ bit &= ~(1 << pos);
+ pos = Long.numberOfTrailingZeros(bit);
+ }
+ if (sb.length() != 0) {
+ sb.deleteCharAt(sb.length() - 1);
+ }
+ return castStringLikeLiteral(args[0], sb.toString());
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MakeSet.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MakeSet.java
new file mode 100644
index 00000000000..60e66a746ce
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MakeSet.java
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import
org.apache.doris.nereids.trees.expressions.functions.ImplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'make_set'.
+ */
+public class MakeSet extends ScalarFunction implements
ImplicitlyCastableSignature {
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).varArgs(BigIntType.INSTANCE,
VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(StringType.INSTANCE).varArgs(BigIntType.INSTANCE,
StringType.INSTANCE)
+ );
+
+ /**
+ * constructor with 2 or more arguments.
+ */
+ public MakeSet(Expression arg0, Expression arg1, Expression... varArgs) {
+ super("make_set", ExpressionUtils.mergeArguments(arg0, arg1, varArgs));
+ }
+
+ /** constructor for withChildren and reuse signature */
+ private MakeSet(ScalarFunctionParams functionParams) {
+ super(functionParams);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public MakeSet withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() >= 2);
+ return new MakeSet(getFunctionParams(children));
+ }
+
+ @Override
+ public boolean nullable() {
+ return child(0).nullable();
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitMakeSet(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index f3077362cb8..7c948f7b9e9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -311,6 +311,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Lpad;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Ltrim;
import org.apache.doris.nereids.trees.expressions.functions.scalar.LtrimIn;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeDate;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.MakeSet;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsEntry;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue;
@@ -2565,4 +2566,8 @@ public interface ScalarFunctionVisitor<R, C> {
default R visitUniform(Uniform uniform, C context) {
return visitScalarFunction(uniform, context);
}
+
+ default R visitMakeSet(MakeSet makeSet, C context) {
+ return visitScalarFunction(makeSet, context);
+ }
}
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 433447d8fff..c34470a3ff4 100644
---
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
+++
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
@@ -677,3 +677,40 @@ true
-- !sql_random_bytes --
\N
+-- !mask_set_1 --
+apple
+blue
+dog,cat
+large
+hot
+tuesday,wednesday
+one,two,three
+
+test2,test3
+汽车,火车
+\N
+
+,should after ,
+first,third
+
+-- !mask_set_2 --
+apple
+blue
+dog,cat
+large
+hot
+tuesday,wednesday
+one,two,three
+
+test1
+自行车
+a,b
+
+,useless
+second,third
+
+-- !mask_set_3 --
+x2,x4,x26,x51,x62,x63,x64
+
+-- !mask_set_4 --
+,y6
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
index 2aae6e1deac..9d8662a4f48 100644
---
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
@@ -429,4 +429,55 @@ suite("test_string_function", "arrow_flight_sql") {
sql("""select/*+SET_VAR(enable_fold_constant_by_be=true)*/ random(10)
from numbers("number" = "10");""")
contains "final projections: random(10)"
}
+
+ sql "DROP TABLE IF EXISTS test_make_set;"
+ sql"""CREATE TABLE test_make_set (
+ id int,
+ bit_num BIGINT,
+ vc1 VARCHAR(50),
+ vc2 VARCHAR(50),
+ vc3 VARCHAR(50)
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES ( 'replication_num' = '1' );"""
+
+ sql"""INSERT INTO test_make_set (id, bit_num, vc1, vc2, vc3) VALUES
+ (1, 1, 'apple', 'orange', NULL),
+ (2, 2, 'red', 'blue', NULL),
+ (3, 3, 'dog', 'cat', 'bird'),
+ (4, 4, 'small', 'medium', 'large'),
+ (5, 5, 'hot', 'warm', NULL),
+ (6, 6, 'monday', 'tuesday', 'wednesday'),
+ (7, 7, 'one', 'two', 'three'),
+ (8, 0, 'hello', 'world', NULL),
+ (9, -2, 'test1', 'test2', 'test3'),
+ (10, -3, '汽车', '自行车', '火车'),
+ (11, NULL, 'a', 'b', 'c'),
+ (12, 7, NULL, NULL, NULL),
+ (13, 3, '', 'should after ,', 'useless'),
+ (14, BIT_SHIFT_LEFT(1, 50) - 3, 'first', 'second', 'third');"""
+
+ qt_mask_set_1"""SELECT MAKE_SET(bit_num, vc1, vc2, vc3) FROM
test_make_set;"""
+ qt_mask_set_2"""SELECT MAKE_SET(id, vc1, vc2, vc3) FROM test_make_set;"""
+ qt_mask_set_3"""SELECT MAKE_SET(BIT_SHIFT_LEFT(1, 63) + BIT_SHIFT_LEFT(1,
62) + BIT_SHIFT_LEFT(1, 61) + BIT_SHIFT_LEFT(1, 50) + BIT_SHIFT_LEFT(1, 25) +
BIT_SHIFT_LEFT(1, 3) + BIT_SHIFT_LEFT(1, 1),
'x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12','x13','x14','x15','x16','x17','x18','x19','x20','x21','x22','x23','x24','x25','x26','x27','x28','x29','x30','x31','x32','x33','x34','x35','x36','x37','x38','x39','x40','x41','x42','x43','x44','x45','x46','x47','x48','x49','x50',
[...]
+ qt_mask_set_4"""SELECT MAKE_SET(BIT_SHIFT_LEFT(1, 62) + BIT_SHIFT_LEFT(1,
60) + BIT_SHIFT_LEFT(1, 58) + BIT_SHIFT_LEFT(1, 45) + BIT_SHIFT_LEFT(1, 5) +
BIT_SHIFT_LEFT(1, 2), 'y1', NULL, '', 'y4','y5','y6','y7','y8','y9','y10',
'y11','y12','y13','y14','y15','y16','y17','y18','y19','y20',
'y21','y22','y23','y24','y25','y26','y27','y28','y29','y30',
'y31','y32','y33','y34','y35','y36','y37','y38','y39','y40',
'y41','y42','y43','y44','y45',NULL,'y47','y48');"""
+
+ testFoldConst("SELECT MAKE_SET(1, 'Doris', 'Apache', 'Database');")
+ testFoldConst("SELECT MAKE_SET(2, 'hello', 'goodbye', 'world');")
+ testFoldConst("SELECT MAKE_SET(3, NULL, '你好', '世界');")
+ testFoldConst("SELECT MAKE_SET(-2, 'a', 'b', 'c');")
+ testFoldConst("SELECT MAKE_SET(NULL, 'a', 'b', 'c');")
+ testFoldConst("SELECT MAKE_SET(4, 'a', 'b', NULL);")
+ testFoldConst("SELECT MAKE_SET(4611686018427387903, 'a', 'b', 'c');")
+ testFoldConst("SELECT MAKE_SET(BIT_SHIFT_LEFT(1, 50) - 3, 'first',
'second', 'third');")
+ testFoldConst("SELECT MAKE_SET(3, '', 'a');")
+
+ test {
+ sql"""SELECT MAKE_SET(184467440737095516156, 'a', 'b', 'c');"""
+ exception "Can not find the compatibility function signature"
+ }
+
+ sql """DROP TABLE IF EXISTS test_make_set;"""
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]