This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-c108335-hive-sql
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-c108335-hive-sql by
this push:
new 3d8a79efc7b Impl substr from zero (#49196)
3d8a79efc7b is described below
commit 3d8a79efc7b9e5ef2074db5d03b5594384811a66
Author: Socrates <[email protected]>
AuthorDate: Tue Mar 18 15:55:58 2025 +0800
Impl substr from zero (#49196)
---
be/src/vec/exec/scan/file_scanner.cpp | 2 +-
be/src/vec/functions/function_string.cpp | 9 +-
be/src/vec/functions/function_string.h | 35 +++++--
.../sink/writer/iceberg/partition_transformers.h | 2 +-
.../doris/catalog/BuiltinScalarFunctions.java | 8 +-
.../functions/scalar/SubstringForZero.java | 113 +++++++++++++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
7 files changed, 158 insertions(+), 16 deletions(-)
diff --git a/be/src/vec/exec/scan/file_scanner.cpp
b/be/src/vec/exec/scan/file_scanner.cpp
index 62e03e3dc5d..21e0ea2f195 100644
--- a/be/src/vec/exec/scan/file_scanner.cpp
+++ b/be/src/vec/exec/scan/file_scanner.cpp
@@ -845,7 +845,7 @@ void FileScanner::_truncate_char_or_varchar_column(Block*
block, int idx, int le
temp_arguments[2] = num_columns_without_result + 1; // len
size_t result_column_id = num_columns_without_result + 2;
- SubstringUtil::substring_execute(*block, temp_arguments, result_column_id,
block->rows());
+ SubstringUtil<>::substring_execute(*block, temp_arguments,
result_column_id, block->rows());
auto res =
ColumnNullable::create(block->get_by_position(result_column_id).column,
null_map_column_ptr);
block->replace_by_position(idx, std::move(res));
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index d87eaaa3dde..be166baa5f1 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -1195,8 +1195,10 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionTrim<Trim2Impl<true, false,
NameLTrimIn>>>();
factory.register_function<FunctionTrim<Trim2Impl<false, true,
NameRTrimIn>>>();
factory.register_function<FunctionConvertTo>();
- factory.register_function<FunctionSubstring<Substr3Impl>>();
- factory.register_function<FunctionSubstring<Substr2Impl>>();
+ factory.register_function<FunctionSubstring<Substr3Impl<false>>>();
+ factory.register_function<FunctionSubstring<Substr2Impl<false>>>();
+ factory.register_function<FunctionSubstring<Substr3Impl<true>>>();
+ factory.register_function<FunctionSubstring<Substr2Impl<true>>>();
factory.register_function<FunctionLeft>();
factory.register_function<FunctionRight>();
factory.register_function<FunctionNullOrEmpty>();
@@ -1242,7 +1244,8 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
- factory.register_alias(SubstringUtil::name, "substr");
+ factory.register_alias(SubstringUtil<>::name, "substr");
+ factory.register_alias(SubstringUtil<true>::name, "substr_for_zero");
factory.register_alias(FunctionToLower::name, "lcase");
factory.register_alias(FunctionToUpper::name, "ucase");
factory.register_alias(FunctionStringDigestOneArg<MD5Sum>::name, "md5");
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index 3b909f4a8d5..b38ca7c50d9 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -162,8 +162,9 @@ struct StringOP {
}
};
+template <bool is_for_zero = false>
struct SubstringUtil {
- static constexpr auto name = "substring";
+ static constexpr auto name = is_for_zero ? "substring_for_zero" :
"substring";
static void substring_execute(Block& block, const ColumnNumbers&
arguments, uint32_t result,
size_t input_rows_count) {
@@ -220,7 +221,7 @@ private:
PMR::vector<size_t> index {&pool};
if constexpr (is_const) {
- if (start[0] == 0 || len[0] <= 0) {
+ if ((!is_for_zero && start[0] == 0) || len[0] <= 0) {
for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
@@ -237,11 +238,17 @@ private:
int char_len = simd::VStringFunctions::get_char_len(str_data,
str_size);
// return empty string if start > src.length
// Here, start_value is compared against the length of the
character.
- if (start_value > char_len || str_size == 0 || start_value == 0 ||
len_value <= 0) {
+ if (start_value > char_len || str_size == 0 || (!is_for_zero &&
start_value == 0) ||
+ len_value <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
+ // Handle Hive compatibility mode - treat start=0 as start=1
+ if (is_for_zero && start_value == 0) {
+ start_value = 1;
+ }
+
size_t byte_pos = 0;
index.clear();
for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
@@ -287,7 +294,7 @@ private:
res_offsets.resize(size);
if constexpr (is_const) {
- if (start[0] == 0 || len[0] <= 0) {
+ if ((!is_for_zero && start[0] == 0) || len[0] <= 0) {
for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
@@ -305,6 +312,11 @@ private:
int start_value = is_const ? start[0] : start[i];
int len_value = is_const ? len[0] : len[i];
+ // Handle Hive compatibility mode - treat start=0 as start=1
+ if (is_for_zero && start_value == 0) {
+ start_value = 1;
+ }
+
if (start_value > str_size || start_value < -str_size || str_size
== 0 ||
len_value <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
@@ -616,7 +628,7 @@ private:
template <typename Impl>
class FunctionSubstring : public IFunction {
public:
- static constexpr auto name = SubstringUtil::name;
+ static constexpr auto name = Impl::name;
String get_name() const override { return name; }
static FunctionPtr create() { return
std::make_shared<FunctionSubstring<Impl>>(); }
@@ -636,7 +648,9 @@ public:
}
};
+template <bool is_for_zero = false>
struct Substr3Impl {
+ static constexpr auto name = SubstringUtil<is_for_zero>::name;
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeInt32>(),
std::make_shared<DataTypeInt32>()};
@@ -645,12 +659,14 @@ struct Substr3Impl {
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
- SubstringUtil::substring_execute(block, arguments, result,
input_rows_count);
+ SubstringUtil<is_for_zero>::substring_execute(block, arguments,
result, input_rows_count);
return Status::OK();
}
};
+template <bool is_for_zero = false>
struct Substr2Impl {
+ static constexpr auto name = SubstringUtil<is_for_zero>::name;
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeInt32>()};
}
@@ -679,7 +695,8 @@ struct Substr2Impl {
block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(),
"strlen"});
ColumnNumbers temp_arguments = {arguments[0], arguments[1],
block.columns() - 1};
- SubstringUtil::substring_execute(block, temp_arguments, result,
input_rows_count);
+ SubstringUtil<is_for_zero>::substring_execute(block, temp_arguments,
result,
+ input_rows_count);
return Status::OK();
}
};
@@ -891,7 +908,7 @@ public:
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = arguments[1];
- SubstringUtil::substring_execute(block, temp_arguments, result,
input_rows_count);
+ SubstringUtil<false>::substring_execute(block, temp_arguments, result,
input_rows_count);
return Status::OK();
}
};
@@ -940,7 +957,7 @@ public:
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = num_columns_without_result + 1;
- SubstringUtil::substring_execute(block, temp_arguments, result,
input_rows_count);
+ SubstringUtil<>::substring_execute(block, temp_arguments, result,
input_rows_count);
return Status::OK();
}
};
diff --git a/be/src/vec/sink/writer/iceberg/partition_transformers.h
b/be/src/vec/sink/writer/iceberg/partition_transformers.h
index 0b18ce24952..cd1cfa6f94c 100644
--- a/be/src/vec/sink/writer/iceberg/partition_transformers.h
+++ b/be/src/vec/sink/writer/iceberg/partition_transformers.h
@@ -177,7 +177,7 @@ public:
temp_arguments[2] = 2; // width
uint32_t result_column_id = 3;
- SubstringUtil::substring_execute(temp_block, temp_arguments,
result_column_id,
+ SubstringUtil<>::substring_execute(temp_block, temp_arguments,
result_column_id,
temp_block.rows());
if (is_nullable) {
auto res_column = ColumnNullable::create(
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index c0409ae3489..0445cc3155e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -426,6 +426,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringForZero;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringIndex;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Tan;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Tanh;
@@ -493,7 +494,8 @@ import java.util.List;
/**
* Builtin scalar functions.
* <p>
- * Note: Please ensure that this class only has some lists and no procedural
code.
+ * Note: Please ensure that this class only has some lists and no procedural
+ * code.
* It helps to be clear and concise.
*/
public class BuiltinScalarFunctions implements FunctionHelper {
@@ -923,6 +925,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(SubBitmap.class, "sub_bitmap"),
scalar(SubReplace.class, "sub_replace"),
scalar(Substring.class, "substr", "substring"),
+ scalar(SubstringForZero.class, "substr_for_zero",
"substring_for_zero"),
scalar(SubstringIndex.class, "substring_index"),
scalar(Tan.class, "tan"),
scalar(Tanh.class, "tanh"),
@@ -991,5 +994,6 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
public static final BuiltinScalarFunctions INSTANCE = new
BuiltinScalarFunctions();
// Note: Do not add any code here!
- private BuiltinScalarFunctions() {}
+ private BuiltinScalarFunctions() {
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubstringForZero.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubstringForZero.java
new file mode 100644
index 00000000000..6c4f2994837
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubstringForZero.java
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * ScalarFunction 'substring_for_zero'. For compatibility with Hive.
+ */
+public class SubstringForZero extends ScalarFunction
+ implements ExplicitlyCastableSignature, PropagateNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT,
IntegerType.INSTANCE),
+
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE,
IntegerType.INSTANCE),
+ FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
+ .args(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE,
IntegerType.INSTANCE),
+ FunctionSignature.ret(StringType.INSTANCE)
+ .args(StringType.INSTANCE, IntegerType.INSTANCE,
IntegerType.INSTANCE));
+
+ /**
+ * constructor with 2 arguments.
+ */
+ public SubstringForZero(Expression arg0, Expression arg1) {
+ super("substring_for_zero", arg0, arg1, Literal.of(Integer.MAX_VALUE));
+ }
+
+ /**
+ * constructor with 3 arguments.
+ */
+ public SubstringForZero(Expression arg0, Expression arg1, Expression arg2)
{
+ super("substring_for_zero", arg0, arg1, arg2);
+ }
+
+ @Override
+ public FunctionSignature computeSignature(FunctionSignature signature) {
+ Optional<Expression> length = arity() == 3
+ ? Optional.of(getArgument(2))
+ : Optional.empty();
+ DataType returnType = VarcharType.SYSTEM_DEFAULT;
+ if (length.isPresent() && length.get() instanceof IntegerLiteral) {
+ returnType = VarcharType.createVarcharType(((IntegerLiteral)
length.get()).getValue());
+ }
+ return signature.withReturnType(returnType);
+ }
+
+ public Expression getSource() {
+ return child(0);
+ }
+
+ public Expression getPosition() {
+ return child(1);
+ }
+
+ public Optional<Expression> getLength() {
+ return arity() == 3 ? Optional.of(child(2)) : Optional.empty();
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public SubstringForZero withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 2
+ || children.size() == 3);
+ if (children.size() == 2) {
+ return new SubstringForZero(children.get(0), children.get(1));
+ } else {
+ return new SubstringForZero(children.get(0), children.get(1),
children.get(2));
+ }
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitSubstringForZero(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index af8740047e6..02aa54e17b2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -424,6 +424,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap;
import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringForZero;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.SubstringIndex;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Tan;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Tanh;
@@ -2071,6 +2072,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(substring, context);
}
+ default R visitSubstringForZero(SubstringForZero substringForZero, C
context) {
+ return visitScalarFunction(substringForZero, context);
+ }
+
default R visitSubstringIndex(SubstringIndex substringIndex, C context) {
return visitScalarFunction(substringIndex, context);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]