(incubator-gluten) branch main updated: [GLUTEN-10621] Fix GPU connector CudfHiveTableHandle is not used (#10843)

yuanzhou Sat, 18 Oct 2025 13:06:26 -0700

This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new f057958545 [GLUTEN-10621] Fix GPU connector CudfHiveTableHandle is not 
used (#10843)
f057958545 is described below

commit f057958545480e3aa9be80c867a4d4455bbed33f
Author: Jin Chengcheng <[email protected]>
AuthorDate: Tue Oct 7 16:57:04 2025 -0400

    [GLUTEN-10621] Fix GPU connector CudfHiveTableHandle is not used (#10843)
    
    After refactor, the ColumnHandle should be HiveColumnHandle.
    And the cudf table scan config is a static config to decide if register 
cudf connector, to decide if this table scan node is cudf scan, the veoxCfg_ is 
a session config, should add this config to session config.
---
 cpp/velox/substrait/SubstraitToVeloxPlan.cc        | 43 +++++++---------------
 .../org/apache/gluten/config/GlutenConfig.scala    |  3 +-
 2 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 6f32c5237e..ab76f2c56c 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -740,23 +740,7 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   // Spark's default compression code is snappy.
   const auto& compressionKind =
       
writerOptions->compressionKind.value_or(common::CompressionKind::CompressionKind_SNAPPY);
-  std::shared_ptr<core::InsertTableHandle> tableHandle;
-  if (useCudfTableHandle(splitInfos_) && 
veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
-      veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
-  #ifdef GLUTEN_ENABLE_GPU
-    tableHandle = std::make_shared<core::InsertTableHandle>(
-        kCudfHiveConnectorId,
-        makeCudfHiveInsertTableHandle(
-            tableColumnNames, /*inputType->names() clolumn name is different*/
-            inputType->children(),
-            std::make_shared<cudf_velox::connector::hive::LocationHandle>(
-                writePath, 
cudf_velox::connector::hive::LocationHandle::TableType::kNew, fileName),
-            compressionKind,
-            {},
-            writerOptions));
-#endif
-  } else {
-    tableHandle = std::make_shared<core::InsertTableHandle>(
+  std::shared_ptr<core::InsertTableHandle> tableHandle = 
std::make_shared<core::InsertTableHandle>(
         kHiveConnectorId,
         makeHiveInsertTableHandle(
             tableColumnNames, /*inputType->names() clolumn name is different*/
@@ -767,7 +751,6 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
             writerOptions,
             fileFormat,
             compressionKind));
-  }
   return std::make_shared<core::TableWriteNode>(
       nextPlanNodeId(),
       inputType,
@@ -1358,21 +1341,21 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   auto dataColumns = ROW(std::move(names), std::move(types));
   connector::ConnectorTableHandlePtr tableHandle;
   auto remainingFilter = readRel.has_filter() ? 
exprConverter_->toVeloxExpr(readRel.filter(), dataColumns) : nullptr;
-  if (useCudfTableHandle(splitInfos_)) {
+  auto connectorId = kHiveConnectorId;
+  if (useCudfTableHandle(splitInfos_) && 
veloxCfg_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
+      veloxCfg_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
 #ifdef GLUTEN_ENABLE_GPU
-    tableHandle = std::make_shared<CudfHiveTableHandle>(
-        kCudfHiveConnectorId, "cudf_hive_table", filterPushdownEnabled, 
nullptr, remainingFilter, dataColumns);
+    connectorId = kCudfHiveConnectorId;
 #endif
-  } else {
-    common::SubfieldFilters subfieldFilters;
-    tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
-        kHiveConnectorId,
-        "hive_table",
-        filterPushdownEnabled,
-        std::move(subfieldFilters),
-        remainingFilter,
-        dataColumns);
   }
+  common::SubfieldFilters subfieldFilters;
+  tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
+      connectorId,
+      "hive_table",
+      filterPushdownEnabled,
+      std::move(subfieldFilters),
+      remainingFilter,
+      dataColumns);
 
   // Get assignments and out names.
   std::vector<std::string> outNames;
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala 
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index b8776853a6..5cdaaeda1a 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -483,7 +483,8 @@ object GlutenConfig extends ConfigRegistry {
     "spark.gluten.sql.columnar.backend.velox.memoryUseHugePages",
     "spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct",
     
"spark.gluten.sql.columnar.backend.velox.memoryPoolCapacityTransferAcrossTasks",
-    "spark.gluten.sql.columnar.backend.velox.preferredBatchBytes"
+    "spark.gluten.sql.columnar.backend.velox.preferredBatchBytes",
+    "spark.gluten.sql.columnar.backend.velox.cudf.enableTableScan"
   )
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-10621] Fix GPU connector CudfHiveTableHandle is not used (#10843)

Reply via email to