This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 1cf7b34cf [CH] Fix left and substring with length -1 (#5943)
1cf7b34cf is described below

commit 1cf7b34cfb346d204c252410c98df142297c85c6
Author: LiuNeng <[email protected]>
AuthorDate: Tue Jun 4 16:39:58 2024 +0800

    [CH] Fix left and substring with length -1 (#5943)
    
    What changes were proposed in this pull request?
    Fix left and substring with length -1
    
    expect empty string
    
    How was this patch tested?
    unit tests
    
    (If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 47 +++++++++++++++-------
 .../Parser/scalar_function_parser/substring.cpp    |  4 +-
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 038b170df..d2752a073 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -722,7 +722,8 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
   }
 
   test("test literals") {
-    val query = """
+    val query =
+      """
       SELECT
         CAST(NULL AS BOOLEAN) AS boolean_literal,
         CAST(1 AS TINYINT) AS tinyint_literal,
@@ -1321,9 +1322,10 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     spark.sql("create table test_1767 (id bigint, data map<string, string>) 
using parquet")
     spark.sql("INSERT INTO test_1767 values(1, map('k', 'v'))")
 
-    val sql = """
-                | select id from test_1767 lateral view
-                | posexplode(split(data['k'], ',')) tx as a, b""".stripMargin
+    val sql =
+      """
+        | select id from test_1767 lateral view
+        | posexplode(split(data['k'], ',')) tx as a, b""".stripMargin
     
runQueryAndCompare(sql)(checkGlutenOperatorMatch[CHGenerateExecTransformer])
 
     spark.sql("drop table test_1767")
@@ -2082,21 +2084,23 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
   }
 
   test("GLUTEN-3149 convert Nan to int") {
-    val sql = """
-                | select cast(a as Int) as n from(
-                |   select cast(s as Float) as a from(
-                |     select if(n_name='ALGERIA', 'nan', '1.0') as s from 
nation
-                |   ))""".stripMargin
+    val sql =
+      """
+        | select cast(a as Int) as n from(
+        |   select cast(s as Float) as a from(
+        |     select if(n_name='ALGERIA', 'nan', '1.0') as s from nation
+        |   ))""".stripMargin
     compareResultsAgainstVanillaSpark(sql, true, { _ => })
   }
 
   test("GLUTEN-3149 convert Inf to int") {
-    val sql = """
-                | select n_regionkey, n is null, isnan(n),  cast(n as int) 
from (
-                |   select n_regionkey, x, n_regionkey/(x) as n from (
-                |     select n_regionkey, cast(n_nationkey as float) as x from 
 nation
-                |   )t1
-                | )t2""".stripMargin
+    val sql =
+      """
+        | select n_regionkey, n is null, isnan(n),  cast(n as int) from (
+        |   select n_regionkey, x, n_regionkey/(x) as n from (
+        |     select n_regionkey, cast(n_nationkey as float) as x from  nation
+        |   )t1
+        | )t2""".stripMargin
     compareResultsAgainstVanillaSpark(sql, true, { _ => })
   }
 
@@ -2564,6 +2568,19 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     spark.sql("drop table test_tbl_5896")
   }
 
+  test("test left with len -1") {
+    val tbl_create_sql =
+      "create table test_left(col string) using parquet"
+    val tbl_insert_sql =
+      "insert into test_left values('test1'), ('test2')"
+    spark.sql(tbl_create_sql)
+    spark.sql(tbl_insert_sql)
+    compareResultsAgainstVanillaSpark("select left(col, -1) from test_left", 
true, { _ => })
+    compareResultsAgainstVanillaSpark("select left(col, -2) from test_left", 
true, { _ => })
+    compareResultsAgainstVanillaSpark("select substring(col, 0, -1) from 
test_left", true, { _ => })
+    spark.sql("drop table test_left")
+  }
+
   test("Inequal join support") {
     withSQLConf(("spark.sql.autoBroadcastJoinThreshold", "-1")) {
       spark.sql("create table ineq_join_t1 (key bigint, value bigint) using 
parquet");
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
index 64c97da80..550e77344 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
@@ -59,7 +59,9 @@ public:
         const auto * const_one_node = addColumnToActionsDAG(actions_dag, 
index_type, 1);
         const auto * equals_zero_node = toFunctionNode(actions_dag, "equals", 
{index_arg, const_zero_node});
         const auto * if_node = toFunctionNode(actions_dag, "if", 
{equals_zero_node, const_one_node, index_arg});
-        const auto * substring_func_node = toFunctionNode(actions_dag, 
"substringUTF8", {str_arg, if_node, length_arg});
+        const auto * less_zero_node = toFunctionNode(actions_dag, "less", 
{length_arg, const_zero_node});
+        const auto * if_len_node = toFunctionNode(actions_dag, "if", 
{less_zero_node, const_zero_node, length_arg});
+        const auto * substring_func_node = toFunctionNode(actions_dag, 
"substringUTF8", {str_arg, if_node, if_len_node});
         return convertNodeTypeIfNeeded(substrait_func, substring_func_node, 
actions_dag);
     }
 protected:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to