This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 231f00c71 [CH] Support bit_length/octet_length function (#6259)
231f00c71 is described below

commit 231f00c71454837717b27374267149d52b5b9833
Author: exmy <[email protected]>
AuthorDate: Mon Jul 1 10:35:31 2024 +0800

    [CH] Support bit_length/octet_length function (#6259)
    
    What changes were proposed in this pull request?
    support bit_length/octet_length function
    
    move length impl to funciton parser
    
    These three functions may receive int type argument in spark ut, add a cast 
to string.
    
    How was this patch tested?
    PASS CI
---
 .../org/apache/gluten/utils/CHExpressionUtil.scala |  1 -
 .../local-engine/Parser/SerializedPlanParser.cpp   |  9 ---
 cpp-ch/local-engine/Parser/SerializedPlanParser.h  |  3 +-
 .../Parser/scalar_function_parser/bitLength.cpp    | 65 +++++++++++++++++++
 .../Parser/scalar_function_parser/length.cpp       | 74 ++++++++++++++++++++++
 .../Parser/scalar_function_parser/octetLength.cpp  | 60 ++++++++++++++++++
 .../gluten/expression/ExpressionMappings.scala     |  1 +
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  4 +-
 .../spark/sql/GlutenStringFunctionsSuite.scala     |  7 +-
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  4 +-
 .../spark/sql/GlutenStringFunctionsSuite.scala     |  4 --
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  4 +-
 .../spark/sql/GlutenStringFunctionsSuite.scala     |  4 --
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  4 +-
 .../spark/sql/GlutenStringFunctionsSuite.scala     |  4 --
 .../apache/gluten/expression/ExpressionNames.scala |  1 +
 16 files changed, 215 insertions(+), 34 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index e9bee8439..14f0ff489 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -194,7 +194,6 @@ object CHExpressionUtil {
     URL_ENCODE -> DefaultValidator(),
     SKEWNESS -> DefaultValidator(),
     SOUNDEX -> DefaultValidator(),
-    BIT_LENGTH -> DefaultValidator(),
     MAKE_YM_INTERVAL -> DefaultValidator(),
     MAP_ZIP_WITH -> DefaultValidator(),
     ZIP_WITH -> DefaultValidator(),
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
index 325ec32dc..77819fd73 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -648,15 +648,6 @@ SerializedPlanParser::getFunctionName(const std::string & 
function_signature, co
         if (null_on_overflow)
             ch_function_name = ch_function_name + "OrNull";
     }
-    else if (function_name == "char_length")
-    {
-        /// In Spark
-        /// char_length returns the number of bytes when input is binary type, 
corresponding to CH length function
-        /// char_length returns the number of characters when input is string 
type, corresponding to CH char_length function
-        ch_function_name = SCALAR_FUNCTIONS.at(function_name);
-        if (function_signature.find("vbin") != std::string::npos)
-            ch_function_name = "length";
-    }
     else if (function_name == "reverse")
     {
         if (function.output_type().has_list())
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index 4a4a19af3..ad2b0d50e 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -130,8 +130,6 @@ static const std::map<std::string, std::string> 
SCALAR_FUNCTIONS
        {"ltrim", ""}, // trimRight or trimRightSpark, depends on argument size
        {"rtrim", ""}, // trimBoth or trimBothSpark, depends on argument size
        {"strpos", "positionUTF8"},
-       {"char_length",
-        "char_length"}, /// Notice: when input argument is binary type, 
corresponding ch function is length instead of char_length
        {"replace", "replaceAll"},
        {"regexp_replace", "replaceRegexpAll"},
        {"regexp_extract_all", "regexpExtractAllSpark"},
@@ -306,6 +304,7 @@ public:
     std::shared_ptr<DB::ActionsDAG> expressionsToActionsDAG(
         const std::vector<substrait::Expression> & expressions, const 
DB::Block & header, const DB::Block & read_schema);
     RelMetricPtr getMetric() { return metrics.empty() ? nullptr : 
metrics.at(0); }
+    const std::unordered_map<std::string, std::string> & getFunctionMapping() 
{ return function_mapping; }
 
     static std::string getFunctionName(const std::string & function_sig, const 
substrait::Expression_ScalarFunction & function);
 
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/bitLength.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/bitLength.cpp
new file mode 100644
index 000000000..9358c4578
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/bitLength.cpp
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/FunctionParser.h>
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+class FunctionParserBitLength : public FunctionParser
+{
+public:
+    explicit FunctionParserBitLength(SerializedPlanParser * plan_parser_) : 
FunctionParser(plan_parser_) { }
+    ~FunctionParserBitLength() override = default;
+
+    static constexpr auto name = "bit_length";
+
+    String getName() const override { return name; }
+
+    const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAGPtr & actions_dag) const override
+    {
+        // parse bit_length(a) as octet_length(a) * 8
+        auto parsed_args = parseFunctionArguments(substrait_func, "", 
actions_dag);
+        if (parsed_args.size() != 1)
+            throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly one arguments", getName());
+
+        const auto * arg = parsed_args[0];
+        const auto * new_arg = arg;
+        if (isInt(DB::removeNullable(arg->result_type)))
+        {
+            const auto * string_type_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeString>(), "Nullable(String)");
+            new_arg = toFunctionNode(actions_dag, "CAST", {arg, 
string_type_node});
+        }
+
+        const auto * octet_length_node = toFunctionNode(actions_dag, 
"octet_length", {new_arg});
+        const auto * const_eight_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeInt32>(), 8);
+        const auto * result_node = toFunctionNode(actions_dag, "multiply", 
{octet_length_node, const_eight_node});
+
+        return convertNodeTypeIfNeeded(substrait_func, result_node, 
actions_dag);;
+    }
+};
+
+static FunctionParserRegister<FunctionParserBitLength> register_bit_length;
+}
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/length.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/length.cpp
new file mode 100644
index 000000000..85fe1f29a
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/length.cpp
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/FunctionParser.h>
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+class FunctionParserLength : public FunctionParser
+{
+public:
+    explicit FunctionParserLength(SerializedPlanParser * plan_parser_) : 
FunctionParser(plan_parser_) { }
+    ~FunctionParserLength() override = default;
+
+    static constexpr auto name = "char_length";
+
+    String getName() const override { return name; }
+
+    const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAGPtr & actions_dag) const override
+    {
+        /**
+            parse length(a) as
+                if input is binary type
+                    length(a) as length(a)
+                else
+                    length(a) as char_length(a)
+         */
+        auto parsed_args = parseFunctionArguments(substrait_func, "", 
actions_dag);
+        if (parsed_args.size() != 1)
+            throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly one arguments", getName());
+
+        const auto * arg = parsed_args[0];
+        const auto * new_arg = arg;
+        if (isInt(removeNullable(arg->result_type)))
+        {
+            const auto * string_type_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeString>(), "Nullable(String)");
+            new_arg = toFunctionNode(actions_dag, "CAST", {arg, 
string_type_node});
+        }
+
+        auto function_signature = 
plan_parser->getFunctionMapping().at(std::to_string(substrait_func.function_reference()));
+        const ActionsDAG::Node * result_node;
+        if (function_signature.find("vbin") != std::string::npos)
+            result_node = toFunctionNode(actions_dag, "length", {new_arg});
+        else
+            result_node = toFunctionNode(actions_dag, "char_length", 
{new_arg});
+
+        return convertNodeTypeIfNeeded(substrait_func, result_node, 
actions_dag);;
+    }
+};
+
+static FunctionParserRegister<FunctionParserLength> register_length;
+}
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/octetLength.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/octetLength.cpp
new file mode 100644
index 000000000..52cbd0317
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/octetLength.cpp
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/FunctionParser.h>
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+class FunctionParserOctetLength : public FunctionParser
+{
+public:
+    explicit FunctionParserOctetLength(SerializedPlanParser * plan_parser_) : 
FunctionParser(plan_parser_) { }
+    ~FunctionParserOctetLength() override = default;
+
+    static constexpr auto name = "octet_length";
+
+    String getName() const override { return name; }
+
+    const ActionsDAG::Node * parse(const substrait::Expression_ScalarFunction 
& substrait_func, ActionsDAGPtr & actions_dag) const override
+    {
+        auto parsed_args = parseFunctionArguments(substrait_func, "", 
actions_dag);
+        if (parsed_args.size() != 1)
+            throw Exception(DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, 
"Function {} requires exactly one arguments", getName());
+
+        const auto * arg = parsed_args[0];
+        const auto * new_arg = arg;
+        if (isInt(DB::removeNullable(arg->result_type)))
+        {
+            const auto * string_type_node = addColumnToActionsDAG(actions_dag, 
std::make_shared<DataTypeString>(), "Nullable(String)");
+            new_arg = toFunctionNode(actions_dag, "CAST", {arg, 
string_type_node});
+        }
+        const auto * octet_length_node = toFunctionNode(actions_dag, 
"octet_length", {new_arg});
+        return convertNodeTypeIfNeeded(substrait_func, octet_length_node, 
actions_dag);;
+    }
+};
+
+static FunctionParserRegister<FunctionParserOctetLength> register_octet_length;
+}
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 678ba3817..806ec844d 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -101,6 +101,7 @@ object ExpressionMappings {
     Sig[Encode](ENCODE),
     Sig[Uuid](UUID),
     Sig[BitLength](BIT_LENGTH),
+    Sig[OctetLength](OCTET_LENGTH),
     Sig[Levenshtein](LEVENSHTEIN),
     Sig[UnBase64](UNBASE64),
     Sig[Base64](BASE64),
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 72185e8ce..60df3ee37 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -438,6 +438,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("string overlay function")
     .exclude("binary overlay function")
     .exclude("string parse_url function")
+    .exclude("string / binary length function")
+    .exclude("SPARK-36751: add octet length api for scala")
+    .exclude("SPARK-36751: add bit length api for scala")
   enableSuite[GlutenSubquerySuite]
     .exclude("SPARK-15370: COUNT bug in subquery in subquery in subquery")
     .exclude("SPARK-26893: Allow pushdown of partition pruning subquery 
filters to file source")
@@ -905,7 +908,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("LOCATE")
     .exclude("LPAD/RPAD")
     .exclude("REPEAT")
-    .exclude("length for string / binary")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
     .excludeGlutenTest("SPARK-40213: ascii for Latin-1 Supplement characters")
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
index a686b6456..b88fdc59d 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
@@ -21,9 +21,4 @@ import 
org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper
 class GlutenStringFunctionsSuite
   extends StringFunctionsSuite
   with GlutenSQLTestsTrait
-  with ExpressionEvalHelper {
-
-  override def testNameBlackList: Seq[String] = super.testNameBlackList ++ Seq(
-    "string / binary length function"
-  )
-}
+  with ExpressionEvalHelper {}
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 2df825fc7..df9f49bfc 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -459,6 +459,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("string overlay function")
     .exclude("binary overlay function")
     .exclude("string parse_url function")
+    .exclude("string / binary length function")
+    .exclude("SPARK-36751: add octet length api for scala")
+    .exclude("SPARK-36751: add bit length api for scala")
   enableSuite[GlutenSubquerySuite]
     .exclude("SPARK-15370: COUNT bug in subquery in subquery in subquery")
     .exclude("SPARK-26893: Allow pushdown of partition pruning subquery 
filters to file source")
@@ -864,7 +867,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("translate")
     .exclude("LOCATE")
     .exclude("REPEAT")
-    .exclude("length for string / binary")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
   enableSuite[GlutenTryCastSuite]
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
index c58284e44..3d82e214f 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
@@ -30,10 +30,6 @@ class GlutenStringFunctionsSuite
 
   import testImplicits._
 
-  override def testNameBlackList: Seq[String] = super.testNameBlackList ++ Seq(
-    "string / binary length function"
-  )
-
   testGluten("string split function with no limit and regex pattern") {
     val df1 = Seq(("aaAbbAcc4")).toDF("a").select(split($"a", "A"))
     checkAnswer(df1, Row(Seq("aa", "bb", "cc4")))
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index edca8f528..0dc2cdd89 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -458,6 +458,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("string overlay function")
     .exclude("binary overlay function")
     .exclude("string parse_url function")
+    .exclude("string / binary length function")
+    .exclude("SPARK-36751: add octet length api for scala")
+    .exclude("SPARK-36751: add bit length api for scala")
   enableSuite[GlutenSubquerySuite]
     .exclude("SPARK-15370: COUNT bug in subquery in subquery in subquery")
     .exclude("SPARK-26893: Allow pushdown of partition pruning subquery 
filters to file source")
@@ -768,7 +771,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("translate")
     .exclude("LOCATE")
     .exclude("REPEAT")
-    .exclude("length for string / binary")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
   enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
index c58284e44..3d82e214f 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
@@ -30,10 +30,6 @@ class GlutenStringFunctionsSuite
 
   import testImplicits._
 
-  override def testNameBlackList: Seq[String] = super.testNameBlackList ++ Seq(
-    "string / binary length function"
-  )
-
   testGluten("string split function with no limit and regex pattern") {
     val df1 = Seq(("aaAbbAcc4")).toDF("a").select(split($"a", "A"))
     checkAnswer(df1, Row(Seq("aa", "bb", "cc4")))
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index edca8f528..0dc2cdd89 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -458,6 +458,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("string overlay function")
     .exclude("binary overlay function")
     .exclude("string parse_url function")
+    .exclude("string / binary length function")
+    .exclude("SPARK-36751: add octet length api for scala")
+    .exclude("SPARK-36751: add bit length api for scala")
   enableSuite[GlutenSubquerySuite]
     .exclude("SPARK-15370: COUNT bug in subquery in subquery in subquery")
     .exclude("SPARK-26893: Allow pushdown of partition pruning subquery 
filters to file source")
@@ -768,7 +771,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("translate")
     .exclude("LOCATE")
     .exclude("REPEAT")
-    .exclude("length for string / binary")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
   enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
index c58284e44..3d82e214f 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenStringFunctionsSuite.scala
@@ -30,10 +30,6 @@ class GlutenStringFunctionsSuite
 
   import testImplicits._
 
-  override def testNameBlackList: Seq[String] = super.testNameBlackList ++ Seq(
-    "string / binary length function"
-  )
-
   testGluten("string split function with no limit and regex pattern") {
     val df1 = Seq(("aaAbbAcc4")).toDF("a").select(split($"a", "A"))
     checkAnswer(df1, Row(Seq("aa", "bb", "cc4")))
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 87b1b4e75..7060e297e 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -127,6 +127,7 @@ object ExpressionNames {
   final val ENCODE = "encode"
   final val UUID = "uuid"
   final val BIT_LENGTH = "bit_length"
+  final val OCTET_LENGTH = "octet_length"
   final val LEVENSHTEIN = "levenshteinDistance"
   final val UNBASE64 = "unbase64"
   final val BASE64 = "base64"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to