This is an automated email from the ASF dual-hosted git repository.
wangzhen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 79db240513 [GLUTEN-7652][VL] Support binary as string (#9325)
79db240513 is described below
commit 79db240513bf4a9e3807f0743c680e83aa0da0cd
Author: Zhen Wang <[email protected]>
AuthorDate: Wed Apr 16 18:40:21 2025 +0800
[GLUTEN-7652][VL] Support binary as string (#9325)
* [GLUTEN-7652][VL] Set data columns for hive HiveTableHandle
* fix format
* fix
* fix
* fix
* fix style
* fix
* fix style
* address comment
* replace to_binary with cast
---
.../org/apache/gluten/execution/VeloxScanSuite.scala | 20 ++++++++++++++++++++
cpp/velox/substrait/SubstraitToVeloxPlan.cc | 16 +++++++++++-----
cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc | 12 ++++++++++--
.../sql/hive/execution/GlutenHiveSQLQuerySuite.scala | 1 -
4 files changed, 41 insertions(+), 8 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
index 525eca9407..f750ff65bd 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
@@ -22,6 +22,7 @@ import org.apache.gluten.config.GlutenConfig
import org.apache.gluten.utils.VeloxFileSystemValidationJniWrapper
import org.apache.spark.SparkConf
+import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GreaterThan
import org.apache.spark.sql.execution.ScalarSubquery
import org.apache.spark.sql.internal.SQLConf
@@ -187,4 +188,23 @@ class VeloxScanSuite extends
VeloxWholeStageTransformerSuite {
}
}
}
+
+ test("test binary as string") {
+ withTempDir {
+ dir =>
+ val path = dir.getCanonicalPath
+ spark
+ .range(2)
+ .selectExpr("id as a", "cast(cast(id + 10 as string) as binary) as
b")
+ .write
+ .mode("overwrite")
+ .parquet(path)
+
+ withTable("test") {
+ sql("create table test (a long, b string) using parquet options
(path '" + path + "')")
+ val df = sql("select b from test group by b order by b")
+ checkAnswer(df, Seq(Row("10"), Row("11")))
+ }
+ }
+ }
}
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 909d00e5e8..1304cb511b 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -1292,18 +1292,24 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
// Velox requires Filter Pushdown must being enabled.
bool filterPushdownEnabled = true;
+ auto names = colNameList;
+ auto types = veloxTypeList;
+ auto dataColumns = ROW(std::move(names), std::move(types));
std::shared_ptr<connector::hive::HiveTableHandle> tableHandle;
if (!readRel.has_filter()) {
tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
- kHiveConnectorId, "hive_table", filterPushdownEnabled,
common::SubfieldFilters{}, nullptr);
+ kHiveConnectorId, "hive_table", filterPushdownEnabled,
common::SubfieldFilters{}, nullptr, dataColumns);
} else {
common::SubfieldFilters subfieldFilters;
- auto names = colNameList;
- auto types = veloxTypeList;
- auto remainingFilter = exprConverter_->toVeloxExpr(readRel.filter(),
ROW(std::move(names), std::move(types)));
+ auto remainingFilter = exprConverter_->toVeloxExpr(readRel.filter(),
dataColumns);
tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
- kHiveConnectorId, "hive_table", filterPushdownEnabled,
std::move(subfieldFilters), remainingFilter);
+ kHiveConnectorId,
+ "hive_table",
+ filterPushdownEnabled,
+ std::move(subfieldFilters),
+ remainingFilter,
+ dataColumns);
}
// Get assignments and out names.
diff --git a/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc
b/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc
index cccc619a86..42c1cb79b8 100644
--- a/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc
+++ b/cpp/velox/tests/Substrait2VeloxPlanConversionTest.cc
@@ -88,6 +88,14 @@ class Substrait2VeloxPlanConversionTest : public
exec::test::HiveConnectorTestBa
//
// Tested Velox operators: TableScan (Filter Pushdown), Project, Aggregate.
TEST_F(Substrait2VeloxPlanConversionTest, q6) {
+ FLAGS_velox_exception_user_stacktrace_enabled = true;
+ FLAGS_velox_exception_system_stacktrace_enabled = true;
+ std::unordered_map<std::string, std::string> hiveConfig{
+ {"hive.orc.use-column-names", "true"}, {"hive.parquet.use-column-names",
"true"}};
+ std::shared_ptr<const facebook::velox::config::ConfigBase> config{
+
std::make_shared<facebook::velox::config::ConfigBase>(std::move(hiveConfig))};
+ resetHiveConnector(config);
+
// Generate the used ORC file.
auto type =
ROW({"l_orderkey",
@@ -257,7 +265,7 @@ TEST_F(Substrait2VeloxPlanConversionTest, ifthenTest) {
// Convert to Velox PlanNode.
auto planNode = planConverter_->toVeloxPlan(substraitPlan,
std::vector<::substrait::ReadRel_LocalFiles>{split});
ASSERT_EQ(
- "-- Project[1][expressions: ] -> \n -- TableScan[0][table: hive_table,
remaining filter:
(and(and(and(and(isnotnull(\"hd_vehicle_count\"),or(equalto(\"hd_buy_potential\",\">10000\"),equalto(\"hd_buy_potential\",\"unknown\"))),greaterthan(\"hd_vehicle_count\",0)),if(greaterthan(\"hd_vehicle_count\",0),greaterthan(divide(cast
\"hd_dep_count\" as DOUBLE,cast \"hd_vehicle_count\" as
DOUBLE),1.2))),isnotnull(\"hd_demo_sk\")))] -> n0_0:BIGINT, n0_1:VARCHAR,
n0_2:BIGINT, n0_3:BIGINT\n",
+ "-- Project[1][expressions: ] -> \n -- TableScan[0][table: hive_table,
remaining filter:
(and(and(and(and(isnotnull(\"hd_vehicle_count\"),or(equalto(\"hd_buy_potential\",\">10000\"),equalto(\"hd_buy_potential\",\"unknown\"))),greaterthan(\"hd_vehicle_count\",0)),if(greaterthan(\"hd_vehicle_count\",0),greaterthan(divide(cast
\"hd_dep_count\" as DOUBLE,cast \"hd_vehicle_count\" as
DOUBLE),1.2))),isnotnull(\"hd_demo_sk\"))), data columns:
ROW<hd_demo_sk:BIGINT,hd_buy_potential:VARCHA [...]
planNode->toString(true, true));
}
@@ -273,7 +281,7 @@ TEST_F(Substrait2VeloxPlanConversionTest, filterUpper) {
// Convert to Velox PlanNode.
auto planNode = planConverter_->toVeloxPlan(substraitPlan,
std::vector<::substrait::ReadRel_LocalFiles>{split});
ASSERT_EQ(
- "-- Project[1][expressions: ] -> \n -- TableScan[0][table: hive_table,
remaining filter: (and(isnotnull(\"key\"),lessthan(\"key\",3)))] ->
n0_0:INTEGER\n",
+ "-- Project[1][expressions: ] -> \n -- TableScan[0][table: hive_table,
remaining filter: (and(isnotnull(\"key\"),lessthan(\"key\",3))), data columns:
ROW<key:INTEGER>] -> n0_0:INTEGER\n",
planNode->toString(true, true));
}
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
index b348f67193..c93aa6640d 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
@@ -47,5 +47,4 @@ class GlutenHiveSQLQuerySuite extends
GlutenHiveSQLQuerySuiteBase {
ignoreIfNotExists = true,
purge = false)
}
-
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]