(incubator-gluten) branch main updated: [GLUTEN-8253][CH] Fix cast failed when in-filter with tuple values (#8256)

changchen Mon, 23 Dec 2024 19:06:21 -0800

This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 1d12c4a9da [GLUTEN-8253][CH] Fix cast failed when in-filter with tuple 
values (#8256)
1d12c4a9da is described below

commit 1d12c4a9da53821a8136da2f49cdbbb6223066be
Author: Wenzheng Liu <[email protected]>
AuthorDate: Tue Dec 24 11:05:57 2024 +0800

    [GLUTEN-8253][CH] Fix cast failed when in-filter with tuple values (#8256)
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 13 ++++++++++
 cpp-ch/local-engine/Parser/ExpressionParser.cpp    | 28 ++++++++++++++++------
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 5dca4ad347..6feb1e8806 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -3350,5 +3350,18 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     compareResultsAgainstVanillaSpark(query_sql, true, { _ => })
     spark.sql("drop table test_tbl_7759")
   }
+
+  test("GLUTEN-8253: Fix cast failed when in-filter with tuple values") {
+    spark.sql("drop table if exists test_filter")
+    spark.sql("create table test_filter(c1 string, c2 string) using parquet")
+    spark.sql(s"""
+                 |insert into test_filter values
+                 |('a1', 'b1'), ('a2', 'b2'), ('a3', 'b3'), ('a4', 'b4'), 
('a5', 'b5'),
+                 |('a6', 'b6'), ('a7', 'b7'), ('a8', 'b8'), ('a9', 'b9'), 
('a10', 'b10'),
+                 |('a11', 'b11'), ('a12', null), (null, 'b13'), (null, null)
+                 |""".stripMargin)
+    val sql = "select * from test_filter where (c1, c2) in (('a1', 'b1'), 
('a2', 'b2'))"
+    compareResultsAgainstVanillaSpark(sql, true, { _ => })
+  }
 }
 // scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp 
b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
index 91621b7931..ab4d8650d2 100644
--- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
@@ -419,12 +419,11 @@ const ActionsDAG::Node * 
ExpressionParser::parseExpression(ActionsDAG & actions_
             }
 
             DB::DataTypePtr elem_type;
-            std::tie(elem_type, std::ignore) = 
LiteralParser::parse(options[0].literal());
-            elem_type = wrapNullableType(nullable, elem_type);
-
-            DB::MutableColumnPtr elem_column = elem_type->createColumn();
-            elem_column->reserve(options_len);
-            for (int i = 0; i < options_len; ++i)
+            std::vector<std::pair<DB::DataTypePtr, DB::Field>> 
options_type_and_field;
+            auto first_option = LiteralParser::parse(options[0].literal());
+            elem_type = wrapNullableType(nullable, first_option.first);
+            options_type_and_field.emplace_back(std::move(first_option));
+            for (int i = 1; i < options_len; ++i)
             {
                 auto type_and_field = 
LiteralParser::parse(options[i].literal());
                 auto option_type = wrapNullableType(nullable, 
type_and_field.first);
@@ -434,8 +433,23 @@ const ActionsDAG::Node * 
ExpressionParser::parseExpression(ActionsDAG & actions_
                         "SingularOrList options type mismatch:{} and {}",
                         elem_type->getName(),
                         option_type->getName());
+                options_type_and_field.emplace_back(std::move(type_and_field));
+            }
 
-                elem_column->insert(type_and_field.second);
+            // check tuple internal types
+            if (isTuple(elem_type) && isTuple(args[0]->result_type))
+            {
+                // Spark guarantees that the types of tuples in the 'in' 
filter are completely consistent.
+                // See org.apache.spark.sql.types.DataType#equalsStructurally
+                // Additionally, the mapping from Spark types to ClickHouse 
types is one-to-one, See TypeParser.cpp
+                // So we can directly use the first tuple type as the type of 
the tuple to avoid nullable mismatch
+                elem_type = args[0]->result_type;
+            }
+            DB::MutableColumnPtr elem_column = elem_type->createColumn();
+            elem_column->reserve(options_len);
+            for (int i = 0; i < options_len; ++i)
+            {
+                elem_column->insert(options_type_and_field[i].second);
             }
             auto name = getUniqueName("__set");
             ColumnWithTypeAndName elem_block{std::move(elem_column), 
elem_type, name};


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-8253][CH] Fix cast failed when in-filter with tuple values (#8256)

Reply via email to