This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 1d12c4a9da [GLUTEN-8253][CH] Fix cast failed when in-filter with tuple
values (#8256)
1d12c4a9da is described below
commit 1d12c4a9da53821a8136da2f49cdbbb6223066be
Author: Wenzheng Liu <[email protected]>
AuthorDate: Tue Dec 24 11:05:57 2024 +0800
[GLUTEN-8253][CH] Fix cast failed when in-filter with tuple values (#8256)
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 13 ++++++++++
cpp-ch/local-engine/Parser/ExpressionParser.cpp | 28 ++++++++++++++++------
2 files changed, 34 insertions(+), 7 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 5dca4ad347..6feb1e8806 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -3350,5 +3350,18 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
compareResultsAgainstVanillaSpark(query_sql, true, { _ => })
spark.sql("drop table test_tbl_7759")
}
+
+ test("GLUTEN-8253: Fix cast failed when in-filter with tuple values") {
+ spark.sql("drop table if exists test_filter")
+ spark.sql("create table test_filter(c1 string, c2 string) using parquet")
+ spark.sql(s"""
+ |insert into test_filter values
+ |('a1', 'b1'), ('a2', 'b2'), ('a3', 'b3'), ('a4', 'b4'),
('a5', 'b5'),
+ |('a6', 'b6'), ('a7', 'b7'), ('a8', 'b8'), ('a9', 'b9'),
('a10', 'b10'),
+ |('a11', 'b11'), ('a12', null), (null, 'b13'), (null, null)
+ |""".stripMargin)
+ val sql = "select * from test_filter where (c1, c2) in (('a1', 'b1'),
('a2', 'b2'))"
+ compareResultsAgainstVanillaSpark(sql, true, { _ => })
+ }
}
// scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
index 91621b7931..ab4d8650d2 100644
--- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
@@ -419,12 +419,11 @@ const ActionsDAG::Node *
ExpressionParser::parseExpression(ActionsDAG & actions_
}
DB::DataTypePtr elem_type;
- std::tie(elem_type, std::ignore) =
LiteralParser::parse(options[0].literal());
- elem_type = wrapNullableType(nullable, elem_type);
-
- DB::MutableColumnPtr elem_column = elem_type->createColumn();
- elem_column->reserve(options_len);
- for (int i = 0; i < options_len; ++i)
+ std::vector<std::pair<DB::DataTypePtr, DB::Field>>
options_type_and_field;
+ auto first_option = LiteralParser::parse(options[0].literal());
+ elem_type = wrapNullableType(nullable, first_option.first);
+ options_type_and_field.emplace_back(std::move(first_option));
+ for (int i = 1; i < options_len; ++i)
{
auto type_and_field =
LiteralParser::parse(options[i].literal());
auto option_type = wrapNullableType(nullable,
type_and_field.first);
@@ -434,8 +433,23 @@ const ActionsDAG::Node *
ExpressionParser::parseExpression(ActionsDAG & actions_
"SingularOrList options type mismatch:{} and {}",
elem_type->getName(),
option_type->getName());
+ options_type_and_field.emplace_back(std::move(type_and_field));
+ }
- elem_column->insert(type_and_field.second);
+ // check tuple internal types
+ if (isTuple(elem_type) && isTuple(args[0]->result_type))
+ {
+ // Spark guarantees that the types of tuples in the 'in'
filter are completely consistent.
+ // See org.apache.spark.sql.types.DataType#equalsStructurally
+ // Additionally, the mapping from Spark types to ClickHouse
types is one-to-one, See TypeParser.cpp
+ // So we can directly use the first tuple type as the type of
the tuple to avoid nullable mismatch
+ elem_type = args[0]->result_type;
+ }
+ DB::MutableColumnPtr elem_column = elem_type->createColumn();
+ elem_column->reserve(options_len);
+ for (int i = 0; i < options_len; ++i)
+ {
+ elem_column->insert(options_type_and_field[i].second);
}
auto name = getUniqueName("__set");
ColumnWithTypeAndName elem_block{std::move(elem_column),
elem_type, name};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]