This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f057958545 [GLUTEN-10621] Fix GPU connector CudfHiveTableHandle is not
used (#10843)
f057958545 is described below
commit f057958545480e3aa9be80c867a4d4455bbed33f
Author: Jin Chengcheng <[email protected]>
AuthorDate: Tue Oct 7 16:57:04 2025 -0400
[GLUTEN-10621] Fix GPU connector CudfHiveTableHandle is not used (#10843)
After refactor, the ColumnHandle should be HiveColumnHandle.
And the cudf table scan config is a static config to decide if register
cudf connector, to decide if this table scan node is cudf scan, the veoxCfg_ is
a session config, should add this config to session config.
---
cpp/velox/substrait/SubstraitToVeloxPlan.cc | 43 +++++++---------------
.../org/apache/gluten/config/GlutenConfig.scala | 3 +-
2 files changed, 15 insertions(+), 31 deletions(-)
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 6f32c5237e..ab76f2c56c 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -740,23 +740,7 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
// Spark's default compression code is snappy.
const auto& compressionKind =
writerOptions->compressionKind.value_or(common::CompressionKind::CompressionKind_SNAPPY);
- std::shared_ptr<core::InsertTableHandle> tableHandle;
- if (useCudfTableHandle(splitInfos_) &&
veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
- veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
- #ifdef GLUTEN_ENABLE_GPU
- tableHandle = std::make_shared<core::InsertTableHandle>(
- kCudfHiveConnectorId,
- makeCudfHiveInsertTableHandle(
- tableColumnNames, /*inputType->names() clolumn name is different*/
- inputType->children(),
- std::make_shared<cudf_velox::connector::hive::LocationHandle>(
- writePath,
cudf_velox::connector::hive::LocationHandle::TableType::kNew, fileName),
- compressionKind,
- {},
- writerOptions));
-#endif
- } else {
- tableHandle = std::make_shared<core::InsertTableHandle>(
+ std::shared_ptr<core::InsertTableHandle> tableHandle =
std::make_shared<core::InsertTableHandle>(
kHiveConnectorId,
makeHiveInsertTableHandle(
tableColumnNames, /*inputType->names() clolumn name is different*/
@@ -767,7 +751,6 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
writerOptions,
fileFormat,
compressionKind));
- }
return std::make_shared<core::TableWriteNode>(
nextPlanNodeId(),
inputType,
@@ -1358,21 +1341,21 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
auto dataColumns = ROW(std::move(names), std::move(types));
connector::ConnectorTableHandlePtr tableHandle;
auto remainingFilter = readRel.has_filter() ?
exprConverter_->toVeloxExpr(readRel.filter(), dataColumns) : nullptr;
- if (useCudfTableHandle(splitInfos_)) {
+ auto connectorId = kHiveConnectorId;
+ if (useCudfTableHandle(splitInfos_) &&
veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
+ veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
#ifdef GLUTEN_ENABLE_GPU
- tableHandle = std::make_shared<CudfHiveTableHandle>(
- kCudfHiveConnectorId, "cudf_hive_table", filterPushdownEnabled,
nullptr, remainingFilter, dataColumns);
+ connectorId = kCudfHiveConnectorId;
#endif
- } else {
- common::SubfieldFilters subfieldFilters;
- tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
- kHiveConnectorId,
- "hive_table",
- filterPushdownEnabled,
- std::move(subfieldFilters),
- remainingFilter,
- dataColumns);
}
+ common::SubfieldFilters subfieldFilters;
+ tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
+ connectorId,
+ "hive_table",
+ filterPushdownEnabled,
+ std::move(subfieldFilters),
+ remainingFilter,
+ dataColumns);
// Get assignments and out names.
std::vector<std::string> outNames;
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index b8776853a6..5cdaaeda1a 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -483,7 +483,8 @@ object GlutenConfig extends ConfigRegistry {
"spark.gluten.sql.columnar.backend.velox.memoryUseHugePages",
"spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct",
"spark.gluten.sql.columnar.backend.velox.memoryPoolCapacityTransferAcrossTasks",
- "spark.gluten.sql.columnar.backend.velox.preferredBatchBytes"
+ "spark.gluten.sql.columnar.backend.velox.preferredBatchBytes",
+ "spark.gluten.sql.columnar.backend.velox.cudf.enableTableScan"
)
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]