This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 1cf7b34cf [CH] Fix left and substring with length -1 (#5943)
1cf7b34cf is described below
commit 1cf7b34cfb346d204c252410c98df142297c85c6
Author: LiuNeng <[email protected]>
AuthorDate: Tue Jun 4 16:39:58 2024 +0800
[CH] Fix left and substring with length -1 (#5943)
What changes were proposed in this pull request?
Fix left and substring with length -1
expect empty string
How was this patch tested?
unit tests
(If this patch involves UI changes, please attach a screenshot; otherwise,
remove this)
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 47 +++++++++++++++-------
.../Parser/scalar_function_parser/substring.cpp | 4 +-
2 files changed, 35 insertions(+), 16 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 038b170df..d2752a073 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -722,7 +722,8 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
}
test("test literals") {
- val query = """
+ val query =
+ """
SELECT
CAST(NULL AS BOOLEAN) AS boolean_literal,
CAST(1 AS TINYINT) AS tinyint_literal,
@@ -1321,9 +1322,10 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
spark.sql("create table test_1767 (id bigint, data map<string, string>)
using parquet")
spark.sql("INSERT INTO test_1767 values(1, map('k', 'v'))")
- val sql = """
- | select id from test_1767 lateral view
- | posexplode(split(data['k'], ',')) tx as a, b""".stripMargin
+ val sql =
+ """
+ | select id from test_1767 lateral view
+ | posexplode(split(data['k'], ',')) tx as a, b""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[CHGenerateExecTransformer])
spark.sql("drop table test_1767")
@@ -2082,21 +2084,23 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
}
test("GLUTEN-3149 convert Nan to int") {
- val sql = """
- | select cast(a as Int) as n from(
- | select cast(s as Float) as a from(
- | select if(n_name='ALGERIA', 'nan', '1.0') as s from
nation
- | ))""".stripMargin
+ val sql =
+ """
+ | select cast(a as Int) as n from(
+ | select cast(s as Float) as a from(
+ | select if(n_name='ALGERIA', 'nan', '1.0') as s from nation
+ | ))""".stripMargin
compareResultsAgainstVanillaSpark(sql, true, { _ => })
}
test("GLUTEN-3149 convert Inf to int") {
- val sql = """
- | select n_regionkey, n is null, isnan(n), cast(n as int)
from (
- | select n_regionkey, x, n_regionkey/(x) as n from (
- | select n_regionkey, cast(n_nationkey as float) as x from
nation
- | )t1
- | )t2""".stripMargin
+ val sql =
+ """
+ | select n_regionkey, n is null, isnan(n), cast(n as int) from (
+ | select n_regionkey, x, n_regionkey/(x) as n from (
+ | select n_regionkey, cast(n_nationkey as float) as x from nation
+ | )t1
+ | )t2""".stripMargin
compareResultsAgainstVanillaSpark(sql, true, { _ => })
}
@@ -2564,6 +2568,19 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
spark.sql("drop table test_tbl_5896")
}
+ test("test left with len -1") {
+ val tbl_create_sql =
+ "create table test_left(col string) using parquet"
+ val tbl_insert_sql =
+ "insert into test_left values('test1'), ('test2')"
+ spark.sql(tbl_create_sql)
+ spark.sql(tbl_insert_sql)
+ compareResultsAgainstVanillaSpark("select left(col, -1) from test_left",
true, { _ => })
+ compareResultsAgainstVanillaSpark("select left(col, -2) from test_left",
true, { _ => })
+ compareResultsAgainstVanillaSpark("select substring(col, 0, -1) from
test_left", true, { _ => })
+ spark.sql("drop table test_left")
+ }
+
test("Inequal join support") {
withSQLConf(("spark.sql.autoBroadcastJoinThreshold", "-1")) {
spark.sql("create table ineq_join_t1 (key bigint, value bigint) using
parquet");
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
index 64c97da80..550e77344 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/substring.cpp
@@ -59,7 +59,9 @@ public:
const auto * const_one_node = addColumnToActionsDAG(actions_dag,
index_type, 1);
const auto * equals_zero_node = toFunctionNode(actions_dag, "equals",
{index_arg, const_zero_node});
const auto * if_node = toFunctionNode(actions_dag, "if",
{equals_zero_node, const_one_node, index_arg});
- const auto * substring_func_node = toFunctionNode(actions_dag,
"substringUTF8", {str_arg, if_node, length_arg});
+ const auto * less_zero_node = toFunctionNode(actions_dag, "less",
{length_arg, const_zero_node});
+ const auto * if_len_node = toFunctionNode(actions_dag, "if",
{less_zero_node, const_zero_node, length_arg});
+ const auto * substring_func_node = toFunctionNode(actions_dag,
"substringUTF8", {str_arg, if_node, if_len_node});
return convertNodeTypeIfNeeded(substrait_func, substring_func_node,
actions_dag);
}
protected:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]