This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new f112af0fd29 [pick](branch-2.1) pick #41555 #41592 #38204 (#41781)
f112af0fd29 is described below
commit f112af0fd29a1b8a9c4247a33b9b7556d822554d
Author: Gabriel <[email protected]>
AuthorDate: Mon Oct 14 14:05:08 2024 +0800
[pick](branch-2.1) pick #41555 #41592 #38204 (#41781)
pick #41555 #41592 #38204
---
be/src/pipeline/exec/aggregation_sink_operator.h | 2 +-
be/src/pipeline/exec/analytic_sink_operator.h | 2 +-
.../exec/distinct_streaming_aggregation_operator.h | 2 +-
be/src/pipeline/exec/hashjoin_build_sink.h | 2 +-
be/src/pipeline/exec/hashjoin_probe_operator.h | 2 +-
be/src/pipeline/exec/operator.h | 11 ++--
.../exec/partitioned_hash_join_probe_operator.h | 2 +-
.../exec/partitioned_hash_join_sink_operator.h | 2 +-
be/src/pipeline/exec/schema_scan_operator.cpp | 3 +
be/src/pipeline/exec/sort_sink_operator.h | 2 +-
be/src/pipeline/exec/union_sink_operator.h | 6 ++
be/src/pipeline/exec/union_source_operator.h | 5 ++
be/src/pipeline/pipeline_x/operator.h | 4 --
.../pipeline_x/pipeline_x_fragment_context.cpp | 43 +++++++------
be/src/vec/functions/function_string.cpp | 28 ++++++++-
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../trees/expressions/functions/scalar/Crc32.java | 71 ++++++++++++++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 ++
gensrc/script/doris_builtins_functions.py | 2 +
.../string_functions/test_string_function_like.out | 12 ++++
.../test_string_function_like.groovy | 4 ++
21 files changed, 174 insertions(+), 38 deletions(-)
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h
b/be/src/pipeline/exec/aggregation_sink_operator.h
index de1f26057ff..e082d803bcb 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/aggregation_sink_operator.h
@@ -164,7 +164,7 @@ public:
? DataDistribution(ExchangeType::PASSTHROUGH)
:
DataSinkOperatorX<AggSinkLocalState>::required_data_distribution();
}
- return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_join
+ return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_operator
? DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE,
_partition_exprs)
: DataDistribution(ExchangeType::HASH_SHUFFLE,
_partition_exprs);
}
diff --git a/be/src/pipeline/exec/analytic_sink_operator.h
b/be/src/pipeline/exec/analytic_sink_operator.h
index eb65414206c..0269ba15be0 100644
--- a/be/src/pipeline/exec/analytic_sink_operator.h
+++ b/be/src/pipeline/exec/analytic_sink_operator.h
@@ -102,7 +102,7 @@ public:
if (_partition_by_eq_expr_ctxs.empty()) {
return {ExchangeType::PASSTHROUGH};
} else if (_order_by_eq_expr_ctxs.empty()) {
- return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_join
+ return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_operator
?
DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE, _partition_exprs)
: DataDistribution(ExchangeType::HASH_SHUFFLE,
_partition_exprs);
}
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
index e2129925846..f661f773102 100644
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
+++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
@@ -107,7 +107,7 @@ public:
DataDistribution required_data_distribution() const override {
if (_needs_finalize || (!_probe_expr_ctxs.empty() &&
!_is_streaming_preagg)) {
- return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_join
+ return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_operator
?
DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE, _partition_exprs)
: DataDistribution(ExchangeType::HASH_SHUFFLE,
_partition_exprs);
}
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h
b/be/src/pipeline/exec/hashjoin_build_sink.h
index dd9383987ec..3b55dc5e44d 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -167,7 +167,7 @@ public:
bool require_shuffled_data_distribution() const override {
return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN &&
!_is_broadcast_join;
}
- bool is_shuffled_hash_join() const override {
+ bool is_shuffled_operator() const override {
return _join_distribution == TJoinDistributionType::PARTITIONED;
}
bool require_data_distribution() const override {
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h
b/be/src/pipeline/exec/hashjoin_probe_operator.h
index f4b1a2c491f..3c75a8d10de 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -178,7 +178,7 @@ public:
bool require_shuffled_data_distribution() const override {
return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN &&
!_is_broadcast_join;
}
- bool is_shuffled_hash_join() const override {
+ bool is_shuffled_operator() const override {
return _join_distribution == TJoinDistributionType::PARTITIONED;
}
bool require_data_distribution() const override {
diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h
index 067790f873f..34beba600fc 100644
--- a/be/src/pipeline/exec/operator.h
+++ b/be/src/pipeline/exec/operator.h
@@ -254,10 +254,13 @@ public:
virtual Status revoke_memory(RuntimeState* state) { return Status::OK(); }
[[nodiscard]] virtual bool require_data_distribution() const { return
false; }
- [[nodiscard]] bool followed_by_shuffled_join() const { return
_followed_by_shuffled_join; }
- void set_followed_by_shuffled_join(bool followed_by_shuffled_join) {
- _followed_by_shuffled_join = followed_by_shuffled_join;
+ [[nodiscard]] bool followed_by_shuffled_operator() const {
+ return _followed_by_shuffled_operator;
}
+ void set_followed_by_shuffled_operator(bool followed_by_shuffled_operator)
{
+ _followed_by_shuffled_operator = followed_by_shuffled_operator;
+ }
+ [[nodiscard]] virtual bool is_shuffled_operator() const { return false; }
[[nodiscard]] virtual bool require_shuffled_data_distribution() const {
return false; }
protected:
@@ -268,7 +271,7 @@ protected:
OperatorXPtr _child_x = nullptr;
bool _is_closed;
- bool _followed_by_shuffled_join = false;
+ bool _followed_by_shuffled_operator = false;
};
/**
diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
index b19537741a8..04320177a20 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
@@ -172,7 +172,7 @@ public:
bool require_shuffled_data_distribution() const override {
return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
}
- bool is_shuffled_hash_join() const override {
+ bool is_shuffled_operator() const override {
return _join_distribution == TJoinDistributionType::PARTITIONED;
}
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
index f3b36086799..2b69c143369 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
@@ -122,7 +122,7 @@ public:
bool require_shuffled_data_distribution() const override {
return _join_op != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
}
- bool is_shuffled_hash_join() const override {
+ bool is_shuffled_operator() const override {
return _join_distribution == TJoinDistributionType::PARTITIONED;
}
diff --git a/be/src/pipeline/exec/schema_scan_operator.cpp
b/be/src/pipeline/exec/schema_scan_operator.cpp
index d5353655ab0..73e54d52be2 100644
--- a/be/src/pipeline/exec/schema_scan_operator.cpp
+++ b/be/src/pipeline/exec/schema_scan_operator.cpp
@@ -285,6 +285,9 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state,
vectorized::Block* bl
} while (block->rows() == 0 && !*eos);
local_state.reached_limit(block, eos);
+ if (*eos) {
+ local_state._finish_dependency->set_always_ready();
+ }
return Status::OK();
}
diff --git a/be/src/pipeline/exec/sort_sink_operator.h
b/be/src/pipeline/exec/sort_sink_operator.h
index ae4823c55d0..d13c7abc44b 100644
--- a/be/src/pipeline/exec/sort_sink_operator.h
+++ b/be/src/pipeline/exec/sort_sink_operator.h
@@ -87,7 +87,7 @@ public:
Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos)
override;
DataDistribution required_data_distribution() const override {
if (_is_analytic_sort) {
- return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_join
+ return _is_colocate && _require_bucket_distribution &&
!_followed_by_shuffled_operator
?
DataDistribution(ExchangeType::BUCKET_HASH_SHUFFLE, _partition_exprs)
: DataDistribution(ExchangeType::HASH_SHUFFLE,
_partition_exprs);
} else if (_merge_by_exchange) {
diff --git a/be/src/pipeline/exec/union_sink_operator.h
b/be/src/pipeline/exec/union_sink_operator.h
index 97b704078c6..b2c1c414314 100644
--- a/be/src/pipeline/exec/union_sink_operator.h
+++ b/be/src/pipeline/exec/union_sink_operator.h
@@ -125,6 +125,12 @@ public:
}
}
+ bool require_shuffled_data_distribution() const override {
+ return _followed_by_shuffled_operator;
+ }
+
+ bool is_shuffled_operator() const override { return
_followed_by_shuffled_operator; }
+
private:
int _get_first_materialized_child_idx() const { return
_first_materialized_child_idx; }
diff --git a/be/src/pipeline/exec/union_source_operator.h
b/be/src/pipeline/exec/union_source_operator.h
index 60530521ec0..0c29374e690 100644
--- a/be/src/pipeline/exec/union_source_operator.h
+++ b/be/src/pipeline/exec/union_source_operator.h
@@ -135,6 +135,11 @@ public:
return Status::OK();
}
[[nodiscard]] int get_child_count() const { return _child_size; }
+ bool require_shuffled_data_distribution() const override {
+ return _followed_by_shuffled_operator;
+ }
+
+ bool is_shuffled_operator() const override { return
_followed_by_shuffled_operator; }
private:
bool _has_data(RuntimeState* state) const {
diff --git a/be/src/pipeline/pipeline_x/operator.h
b/be/src/pipeline/pipeline_x/operator.h
index 72f47b576f6..e32176a08d1 100644
--- a/be/src/pipeline/pipeline_x/operator.h
+++ b/be/src/pipeline/pipeline_x/operator.h
@@ -231,8 +231,6 @@ public:
[[nodiscard]] virtual bool can_terminate_early(RuntimeState* state) {
return false; }
- [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
-
bool can_read() override {
LOG(FATAL) << "should not reach here!";
return false;
@@ -627,8 +625,6 @@ public:
: DataDistribution(ExchangeType::NOOP);
}
- [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
-
Status close(RuntimeState* state) override {
return Status::InternalError("Should not reach here!");
}
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
index ac527ed8e69..a3dff107f1b 100644
--- a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
@@ -771,7 +771,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
ObjectPool* pool, const std::vector<TPlanNode>& tnodes,
const doris::TPipelineFragmentParams& request, const DescriptorTbl&
descs,
OperatorXPtr parent, int* node_idx, OperatorXPtr* root, PipelinePtr&
cur_pipe,
- int child_idx, const bool followed_by_shuffled_join) {
+ int child_idx, const bool followed_by_shuffled_operator) {
// propagate error case
if (*node_idx >= tnodes.size()) {
// TODO: print thrift msg
@@ -782,11 +782,11 @@ Status PipelineXFragmentContext::_create_tree_helper(
const TPlanNode& tnode = tnodes[*node_idx];
int num_children = tnodes[*node_idx].num_children;
- bool current_followed_by_shuffled_join = followed_by_shuffled_join;
+ bool current_followed_by_shuffled_operator = followed_by_shuffled_operator;
OperatorXPtr op = nullptr;
RETURN_IF_ERROR(_create_operator(pool, tnodes[*node_idx], request, descs,
op, cur_pipe,
parent == nullptr ? -1 :
parent->node_id(), child_idx,
- followed_by_shuffled_join));
+ followed_by_shuffled_operator));
// assert(parent != nullptr || (node_idx == 0 && root_expr != nullptr));
if (parent != nullptr) {
@@ -797,7 +797,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
}
/**
- * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed
by a shuffled hash join.
+ * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed
by a shuffled operator (shuffled hash join, union operator followed by
co-located operators).
*
* For plan:
* LocalExchange(id=0) -> Aggregation(id=1) -> ShuffledHashJoin(id=2)
@@ -811,8 +811,8 @@ Status PipelineXFragmentContext::_create_tree_helper(
cur_pipe->operator_xs().empty()
? cur_pipe->sink_x()->require_shuffled_data_distribution()
: op->require_shuffled_data_distribution();
- current_followed_by_shuffled_join =
- (followed_by_shuffled_join || op->is_shuffled_hash_join()) &&
+ current_followed_by_shuffled_operator =
+ (followed_by_shuffled_operator || op->is_shuffled_operator()) &&
require_shuffled_data_distribution;
cur_pipe->_name.push_back('-');
@@ -823,7 +823,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
for (int i = 0; i < num_children; i++) {
++*node_idx;
RETURN_IF_ERROR(_create_tree_helper(pool, tnodes, request, descs, op,
node_idx, nullptr,
- cur_pipe, i,
current_followed_by_shuffled_join));
+ cur_pipe, i,
current_followed_by_shuffled_operator));
// we are expecting a child, but have used all nodes
// this means we have been given a bad tree and must fail
@@ -865,13 +865,13 @@ Status PipelineXFragmentContext::_add_local_exchange_impl(
* `bucket_seq_to_instance_idx` is empty if no scan operator is contained
in this fragment.
* So co-located operators(e.g. Agg, Analytic) should use `HASH_SHUFFLE`
instead of `BUCKET_HASH_SHUFFLE`.
*/
- const bool followed_by_shuffled_join =
- operator_xs.size() > idx ?
operator_xs[idx]->followed_by_shuffled_join()
- :
cur_pipe->sink_x()->followed_by_shuffled_join();
+ const bool followed_by_shuffled_operator =
+ operator_xs.size() > idx ?
operator_xs[idx]->followed_by_shuffled_operator()
+ :
cur_pipe->sink_x()->followed_by_shuffled_operator();
const bool should_disable_bucket_shuffle =
bucket_seq_to_instance_idx.empty() &&
shuffle_idx_to_instance_idx.find(-1) ==
shuffle_idx_to_instance_idx.end() &&
- followed_by_shuffled_join;
+ followed_by_shuffled_operator;
sink.reset(new LocalExchangeSinkOperatorX(
sink_id, local_exchange_id,
should_disable_bucket_shuffle ? _total_instances : _num_instances,
@@ -1047,7 +1047,7 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
const DescriptorTbl& descs,
OperatorXPtr& op,
PipelinePtr& cur_pipe, int
parent_idx,
int child_idx,
- const bool
followed_by_shuffled_join) {
+ const bool
followed_by_shuffled_operator) {
// We directly construct the operator from Thrift because the given array
is in the order of preorder traversal.
// Therefore, here we need to use a stack-like structure.
_pipeline_parent_map.pop(cur_pipe, parent_idx, child_idx);
@@ -1121,7 +1121,7 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
op.reset(new DistinctStreamingAggOperatorX(pool,
next_operator_id(), tnode, descs,
_require_bucket_distribution));
RETURN_IF_ERROR(cur_pipe->add_operator(op));
- op->set_followed_by_shuffled_join(followed_by_shuffled_join);
+
op->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
_require_bucket_distribution =
_require_bucket_distribution ||
op->require_data_distribution();
} else if (tnode.agg_node.__isset.use_streaming_preaggregation &&
@@ -1152,7 +1152,7 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
sink.reset(new AggSinkOperatorX(pool, next_sink_operator_id(),
tnode, descs,
_require_bucket_distribution));
}
- sink->set_followed_by_shuffled_join(followed_by_shuffled_join);
+
sink->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
_require_bucket_distribution =
_require_bucket_distribution ||
sink->require_data_distribution();
sink->set_dests_id({op->operator_id()});
@@ -1203,8 +1203,8 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
_pipeline_parent_map.push(op->node_id(), cur_pipe);
_pipeline_parent_map.push(op->node_id(), build_side_pipe);
- sink->set_followed_by_shuffled_join(sink->is_shuffled_hash_join());
- op->set_followed_by_shuffled_join(op->is_shuffled_hash_join());
+
sink->set_followed_by_shuffled_operator(sink->is_shuffled_operator());
+ op->set_followed_by_shuffled_operator(op->is_shuffled_operator());
} else {
op.reset(new HashJoinProbeOperatorX(pool, tnode,
next_operator_id(), descs));
RETURN_IF_ERROR(cur_pipe->add_operator(op));
@@ -1225,8 +1225,8 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
_pipeline_parent_map.push(op->node_id(), cur_pipe);
_pipeline_parent_map.push(op->node_id(), build_side_pipe);
- sink->set_followed_by_shuffled_join(sink->is_shuffled_hash_join());
- op->set_followed_by_shuffled_join(op->is_shuffled_hash_join());
+
sink->set_followed_by_shuffled_operator(sink->is_shuffled_operator());
+ op->set_followed_by_shuffled_operator(op->is_shuffled_operator());
}
_require_bucket_distribution =
_require_bucket_distribution ||
op->require_data_distribution();
@@ -1256,6 +1256,7 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
case TPlanNodeType::UNION_NODE: {
int child_count = tnode.num_children;
op.reset(new UnionSourceOperatorX(pool, tnode, next_operator_id(),
descs));
+ op->set_followed_by_shuffled_operator(_require_bucket_distribution);
RETURN_IF_ERROR(cur_pipe->add_operator(op));
const auto downstream_pipeline_id = cur_pipe->id();
@@ -1298,7 +1299,7 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
sink.reset(new SortSinkOperatorX(pool, next_sink_operator_id(),
tnode, descs,
_require_bucket_distribution));
}
- sink->set_followed_by_shuffled_join(followed_by_shuffled_join);
+ sink->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
_require_bucket_distribution =
_require_bucket_distribution ||
sink->require_data_distribution();
sink->set_dests_id({op->operator_id()});
@@ -1338,7 +1339,7 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
DataSinkOperatorXPtr sink;
sink.reset(new AnalyticSinkOperatorX(pool, next_sink_operator_id(),
tnode, descs,
_require_bucket_distribution));
- sink->set_followed_by_shuffled_join(followed_by_shuffled_join);
+ sink->set_followed_by_shuffled_operator(followed_by_shuffled_operator);
_require_bucket_distribution =
_require_bucket_distribution ||
sink->require_data_distribution();
sink->set_dests_id({op->operator_id()});
@@ -1349,11 +1350,13 @@ Status
PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
case TPlanNodeType::INTERSECT_NODE: {
RETURN_IF_ERROR(_build_operators_for_set_operation_node<true>(
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
+ op->set_followed_by_shuffled_operator(_require_bucket_distribution);
break;
}
case TPlanNodeType::EXCEPT_NODE: {
RETURN_IF_ERROR(_build_operators_for_set_operation_node<false>(
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
+ op->set_followed_by_shuffled_operator(_require_bucket_distribution);
break;
}
case TPlanNodeType::REPEAT_NODE: {
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index 7ebcac9ca1b..edf43300f94 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -82,7 +82,7 @@ struct NameQuoteImpl {
}
};
-struct NameStringLenght {
+struct NameStringLength {
static constexpr auto name = "length";
};
@@ -104,6 +104,28 @@ struct StringLengthImpl {
}
};
+struct NameCrc32 {
+ static constexpr auto name = "crc32";
+};
+
+struct Crc32Impl {
+ using ReturnType = DataTypeInt64;
+ static constexpr auto TYPE_INDEX = TypeIndex::String;
+ using Type = String;
+ using ReturnColumnType = ColumnVector<Int64>;
+
+ static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
+ PaddedPODArray<Int64>& res) {
+ auto size = offsets.size();
+ res.resize(size);
+ for (int i = 0; i < size; ++i) {
+ res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i
- 1],
+ offsets[i] - offsets[i - 1]);
+ }
+ return Status::OK();
+ }
+};
+
struct NameStringUtf8Length {
static constexpr auto name = "char_length";
};
@@ -1073,7 +1095,8 @@ using StringFindInSetImpl =
StringFunctionImpl<LeftDataType, RightDataType, Find
// ready for regist function
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
-using FunctionStringLength = FunctionUnaryToType<StringLengthImpl,
NameStringLenght>;
+using FunctionStringLength = FunctionUnaryToType<StringLengthImpl,
NameStringLength>;
+using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl,
NameStringUtf8Length>;
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
using FunctionStringStartsWith =
@@ -1111,6 +1134,7 @@ using FunctionStringRPad = FunctionStringPad<StringRPad>;
void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionStringASCII>();
factory.register_function<FunctionStringLength>();
+ factory.register_function<FunctionCrc32>();
factory.register_function<FunctionStringUTF8Length>();
factory.register_function<FunctionStringSpace>();
factory.register_function<FunctionStringStartsWith>();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 4b87fa37652..f84bda52178 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -122,6 +122,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Cos;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Cosh;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.CosineDistance;
import org.apache.doris.nereids.trees.expressions.functions.scalar.CountEqual;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Crc32;
import org.apache.doris.nereids.trees.expressions.functions.scalar.CreateMap;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateNamedStruct;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateStruct;
@@ -723,6 +724,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(Least.class, "least"),
scalar(Left.class, "left"),
scalar(Length.class, "length"),
+ scalar(Crc32.class, "crc32"),
scalar(Like.class, "like"),
scalar(Ln.class, "ln"),
scalar(Locate.class, "locate"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Crc32.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Crc32.java
new file mode 100644
index 00000000000..036807062fa
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Crc32.java
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'crc32'. This class is generated by GenerateFunction.
+ */
+public class Crc32 extends ScalarFunction
+ implements UnaryExpression, ExplicitlyCastableSignature,
PropagateNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(BigIntType.INSTANCE).args(StringType.INSTANCE)
+ );
+
+ /**
+ * constructor with 1 argument.
+ */
+ public Crc32(Expression arg) {
+ super("crc32", arg);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public Crc32 withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 1);
+ return new Crc32(children.get(0));
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitCrc32(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index a2562baa758..79b8452e1df 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -129,6 +129,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.Cos;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Cosh;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.CosineDistance;
import org.apache.doris.nereids.trees.expressions.functions.scalar.CountEqual;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.Crc32;
import org.apache.doris.nereids.trees.expressions.functions.scalar.CreateMap;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateNamedStruct;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateStruct;
@@ -1450,6 +1451,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(length, context);
}
+ default R visitCrc32(Crc32 crc32, C context) {
+ return visitScalarFunction(crc32, context);
+ }
+
default R visitLike(Like like, C context) {
return visitStringRegexPredicate(like, context);
}
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 4b5e113faee..a05f6ac8abb 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1574,6 +1574,7 @@ visible_functions = {
[['rpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'],
'ALWAYS_NULLABLE'],
[['append_trailing_char_if_absent'], 'VARCHAR', ['VARCHAR',
'VARCHAR'], 'ALWAYS_NULLABLE'],
[['length'], 'INT', ['VARCHAR'], ''],
+ [['crc32'], 'BIGINT', ['VARCHAR'], ''],
[['bit_length'], 'INT', ['VARCHAR'], ''],
[['char_length', 'character_length'], 'INT', ['VARCHAR'], ''],
@@ -1639,6 +1640,7 @@ visible_functions = {
[['rpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
[['append_trailing_char_if_absent'], 'STRING', ['STRING', 'STRING'],
'ALWAYS_NULLABLE'],
[['length'], 'INT', ['STRING'], ''],
+ [['crc32'], 'BIGINT', ['STRING'], ''],
[['bit_length'], 'INT', ['STRING'], ''],
[['char_length', 'character_length'], 'INT', ['STRING'], ''],
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
index 51fbfc68af1..9fcfc2d6ee9 100644
---
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
+++
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_like.out
@@ -245,3 +245,15 @@ bb
-- !sql --
+-- !crc32_1 --
+348606243
+
+-- !crc32_2 --
+130583814
+
+-- !crc32_3 --
+2707236321
+
+-- !crc32_4 --
+\N
+
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
index 9c5deed8651..e092526b03c 100644
---
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_like.groovy
@@ -90,4 +90,8 @@ suite("test_string_function_like") {
qt_sql "SELECT k FROM ${tbName} WHERE NOT LIKE(k, \"%\") ORDER BY k;"
// sql "DROP TABLE ${tbName};"
+ qt_crc32_1 "select crc32(\"DORIS\");"
+ qt_crc32_2 "select crc32(\"APACHE DORIS\");"
+ qt_crc32_3 "select crc32(10);"
+ qt_crc32_4 "select crc32(NULL);"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]