(incubator-gluten) branch main updated: [GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08) (#10849)

marong Sat, 18 Oct 2025 10:41:49 -0700

This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 1d4f2d271b [GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08) 
(#10849)
1d4f2d271b is described below

commit 1d4f2d271bdda7b6e932fb21c5707715b451df9c
Author: Gluten Performance Bot 
<[email protected]>
AuthorDate: Wed Oct 8 11:51:16 2025 +0100

    [GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08) (#10849)
    
    * [GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08)
    
    Upstream Velox's New Commits:
    81d943df0 by Vlad, feat: Add IConfig interface (14889)
    d93affa03 by Timothy Meehan, build: Add 'abseil' as a build dependency for 
re2 (14849)
    21da67163 by Simon Eves, fix(build): Bump Hadoop version for more reliable 
download (14928)
    211380331 by dependabot[bot], build(ci): Bump actions/cache from 4.2.4 to 
4.3.0 (14993)
    f266c8486 by Deepak Majeti, docs: Fix iceberg functions formatting and some 
warnings (14911)
    80be1866a by Masha Basmanova, feat: Add Variant::toJson(Type) API that 
doesn't require shared_ptr (15070)
    5c1dac468 by Xiaoxuan Meng, fix: Fix the test task cancellation condition 
(15065)
    aa406cbd6 by Deepak Majeti, feat(cudf): Remove gflags in favor of 
CudfConfig (14963)
    78159fa46 by Xiaoxuan Meng, refactor: Remove legacy task spill code after 
update prestissimo (15053)
    a31539e70 by Pedro Eugenio Rocha Pedreira, docs: Add new component and 
maintainer for LibcuDF bindings (15057)
    5a31ae324 by lingbin, fix(ssdcache): Write rate calculation (15028)
    207c1b679 by Xiaoxuan Meng, fix: Fix sxtream failed test by supporting task 
cancellation in test (15052)
    
    Signed-off-by: glutenperfbot <[email protected]>
    
    ---------
    
    Signed-off-by: glutenperfbot <[email protected]>
    Co-authored-by: glutenperfbot <[email protected]>
    Co-authored-by: Rong Ma <[email protected]>
---
 cpp/core/config/GlutenConfig.h                |  4 +-
 cpp/velox/compute/VeloxBackend.cc             | 16 ++++--
 cpp/velox/compute/WholeStageResultIterator.cc |  3 +-
 cpp/velox/config/VeloxConfig.h                |  2 +-
 cpp/velox/jni/VeloxJniWrapper.cc              | 21 ++------
 cpp/velox/substrait/SubstraitToVeloxPlan.cc   | 74 ++++++++++++---------------
 ep/build-velox/src/get_velox.sh               |  4 +-
 7 files changed, 56 insertions(+), 68 deletions(-)

diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index 2f5992b9c0..110c741a4b 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -95,9 +95,9 @@ const std::string kSparkJsonIgnoreNullFields = 
"spark.sql.jsonGenerator.ignoreNu
 
 // cudf
 const std::string kCudfEnabled = "spark.gluten.sql.columnar.cudf";
-const bool kCudfEnabledDefault = "true";
+constexpr bool kCudfEnabledDefault = true;
 const std::string kDebugCudf = "spark.gluten.sql.debug.cudf";
-const bool kDebugCudfDefault = "false";
+const std::string kDebugCudfDefault = "false";
 
 std::unordered_map<std::string, std::string>
 parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t 
planDataLength);
diff --git a/cpp/velox/compute/VeloxBackend.cc 
b/cpp/velox/compute/VeloxBackend.cc
index 54cb08bf57..fedb5aa171 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -28,6 +28,7 @@
 #include "utils/qat/QatCodec.h"
 #endif
 #ifdef GLUTEN_ENABLE_GPU
+#include "velox/experimental/cudf/CudfConfig.h"
 #include "velox/experimental/cudf/connectors/hive/CudfHiveConnector.h"
 #include "velox/experimental/cudf/exec/ToCudf.h"
 #endif
@@ -166,11 +167,16 @@ void VeloxBackend::init(
 
 #ifdef GLUTEN_ENABLE_GPU
   if (backendConf_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
-    FLAGS_velox_cudf_debug = backendConf_->get<bool>(kDebugCudf, 
kDebugCudfDefault);
-    FLAGS_velox_cudf_memory_resource = 
backendConf_->get<std::string>(kCudfMemoryResource, kCudfMemoryResourceDefault);
-    auto& options = velox::cudf_velox::CudfOptions::getInstance();
-    options.memoryPercent = backendConf_->get<int32_t>(kCudfMemoryPercent, 
kCudfMemoryPercentDefault);
-    velox::cudf_velox::registerCudf(options);
+    std::unordered_map<std::string, std::string> options = {
+        {velox::cudf_velox::CudfConfig::kCudfEnabled, "true"},
+        {velox::cudf_velox::CudfConfig::kCudfDebugEnabled, 
backendConf_->get(kDebugCudf, kDebugCudfDefault)},
+        {velox::cudf_velox::CudfConfig::kCudfMemoryResource,
+         backendConf_->get(kCudfMemoryResource, kCudfMemoryResourceDefault)},
+        {velox::cudf_velox::CudfConfig::kCudfMemoryPercent,
+         backendConf_->get(kCudfMemoryPercent, kCudfMemoryPercentDefault)}};
+    auto& cudfConfig = velox::cudf_velox::CudfConfig::getInstance();
+    cudfConfig.initialize(std::move(options));
+    velox::cudf_velox::registerCudf();
   }
 #endif
 
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc 
b/cpp/velox/compute/WholeStageResultIterator.cc
index fb3e1550a8..7846898cb7 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -24,6 +24,7 @@
 #ifdef GLUTEN_ENABLE_GPU
 #include <cudf/io/types.hpp>
 #include <mutex>
+#include "velox/experimental/cudf/CudfConfig.h"
 #include "velox/experimental/cudf/connectors/hive/CudfHiveConnectorSplit.h"
 #include "velox/experimental/cudf/exec/ToCudf.h"
 #endif
@@ -661,7 +662,7 @@ std::unordered_map<std::string, std::string> 
WholeStageResultIterator::getQueryC
         std::to_string(veloxCfg_->get<bool>(kSparkJsonIgnoreNullFields, true));
 
 #ifdef GLUTEN_ENABLE_GPU
-    configs[cudf_velox::kCudfEnabled] = 
std::to_string(veloxCfg_->get<bool>(kCudfEnabled, false));
+    configs[velox::cudf_velox::CudfConfig::kCudfEnabled] = 
std::to_string(veloxCfg_->get<bool>(kCudfEnabled, false));
 #endif
 
     const auto setIfExists = [&](const std::string& glutenKey, const 
std::string& veloxKey) {
diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
index 690fbd59f1..4406887978 100644
--- a/cpp/velox/config/VeloxConfig.h
+++ b/cpp/velox/config/VeloxConfig.h
@@ -179,7 +179,7 @@ const std::string kCudfMemoryResourceDefault =
 
 // Initial percent of GPU memory to allocate for memory resource for one thread
 const std::string kCudfMemoryPercent = 
"spark.gluten.sql.columnar.backend.velox.cudf.memoryPercent";
-const int32_t kCudfMemoryPercentDefault = 50;
+const std::string kCudfMemoryPercentDefault = "50";
 
 /// Preferred size of batches in bytes to be returned by operators.
 const std::string kVeloxPreferredBatchBytes = 
"spark.gluten.sql.columnar.backend.velox.preferredBatchBytes";
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index 9e4f37687a..0052880143 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -243,30 +243,19 @@ JNIEXPORT jlong JNICALL 
Java_org_apache_gluten_columnarbatch_VeloxColumnarBatchJ
 
   auto repeatedBatch = 
ObjectStore::retrieve<ColumnarBatch>(repeatedBatchHandle);
   auto nonRepeatedBatch = 
ObjectStore::retrieve<ColumnarBatch>(nonRepeatedBatchHandle);
-  GLUTEN_CHECK(rowNums == nonRepeatedBatch->numRows(),
-      "Row numbers after repeated do not match the expected size");
+  GLUTEN_CHECK(rowNums == nonRepeatedBatch->numRows(), "Row numbers after 
repeated do not match the expected size");
 
   // wrap repeatedBatch's rowVector in dictionary vector.
   auto vb = std::dynamic_pointer_cast<VeloxColumnarBatch>(repeatedBatch);
   auto rowVector = vb->getRowVector();
   std::vector<VectorPtr> outputs(rowVector->childrenSize());
   for (int i = 0; i < outputs.size(); i++) {
-    outputs[i] = BaseVector::wrapInDictionary(
-        nullptr /*nulls*/,
-        repeatedIndices,
-        rowNums,
-        rowVector->childAt(i));
+    outputs[i] = BaseVector::wrapInDictionary(nullptr /*nulls*/, 
repeatedIndices, rowNums, rowVector->childAt(i));
   }
-  auto newRowVector = std::make_shared<RowVector>(
-      veloxPool.get(),
-      rowVector->type(),
-      BufferPtr(nullptr),
-      rowNums,
-      std::move(outputs));
+  auto newRowVector =
+      std::make_shared<RowVector>(veloxPool.get(), rowVector->type(), 
BufferPtr(nullptr), rowNums, std::move(outputs));
   repeatedBatch = 
std::make_shared<VeloxColumnarBatch>(std::move(newRowVector));
-  auto newBatch = VeloxColumnarBatch::compose(
-      veloxPool.get(),
-      {std::move(repeatedBatch), std::move(nonRepeatedBatch)});
+  auto newBatch = VeloxColumnarBatch::compose(veloxPool.get(), 
{std::move(repeatedBatch), std::move(nonRepeatedBatch)});
   return ctx->saveObject(newBatch);
   JNI_METHOD_END(kInvalidObjectHandle)
 }
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index ab76f2c56c..ba7a707568 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -168,15 +168,15 @@ bool SplitInfo::canUseCudfConnector() {
   bool isEmpty = partitionColumns.empty();
 
   if (!isEmpty) {
-      // Check if all maps are empty
-      bool allMapsEmpty = true;
-      for (const auto& m : partitionColumns) {
-          if (!m.empty()) {
-              allMapsEmpty = false;
-              break;
-          }
+    // Check if all maps are empty
+    bool allMapsEmpty = true;
+    for (const auto& m : partitionColumns) {
+      if (!m.empty()) {
+        allMapsEmpty = false;
+        break;
       }
-      isEmpty = allMapsEmpty;
+    }
+    isEmpty = allMapsEmpty;
   }
   return isEmpty && format == dwio::common::FileFormat::PARQUET;
 }
@@ -596,19 +596,17 @@ std::shared_ptr<connector::hive::HiveInsertTableHandle> 
makeHiveInsertTableHandl
     }
     if (std::find(partitionedBy.cbegin(), partitionedBy.cend(), 
tableColumnNames.at(i)) != partitionedBy.cend()) {
       ++numPartitionColumns;
-      columnHandles.emplace_back(
-          std::make_shared<connector::hive::HiveColumnHandle>(
-              tableColumnNames.at(i),
-              connector::hive::HiveColumnHandle::ColumnType::kPartitionKey,
-              tableColumnTypes.at(i),
-              tableColumnTypes.at(i)));
+      
columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>(
+          tableColumnNames.at(i),
+          connector::hive::HiveColumnHandle::ColumnType::kPartitionKey,
+          tableColumnTypes.at(i),
+          tableColumnTypes.at(i)));
     } else {
-      columnHandles.emplace_back(
-          std::make_shared<connector::hive::HiveColumnHandle>(
-              tableColumnNames.at(i),
-              connector::hive::HiveColumnHandle::ColumnType::kRegular,
-              tableColumnTypes.at(i),
-              tableColumnTypes.at(i)));
+      
columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>(
+          tableColumnNames.at(i),
+          connector::hive::HiveColumnHandle::ColumnType::kRegular,
+          tableColumnTypes.at(i),
+          tableColumnTypes.at(i)));
     }
   }
   VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size());
@@ -635,11 +633,10 @@ std::shared_ptr<CudfHiveInsertTableHandle> 
makeCudfHiveInsertTableHandle(
   std::vector<std::shared_ptr<const CudfHiveColumnHandle>> columnHandles;
 
   for (int i = 0; i < tableColumnNames.size(); ++i) {
-    columnHandles.push_back(
-        std::make_shared<CudfHiveColumnHandle>(
-            tableColumnNames.at(i),
-            tableColumnTypes.at(i),
-            
cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))}));
+    columnHandles.push_back(std::make_shared<CudfHiveColumnHandle>(
+        tableColumnNames.at(i),
+        tableColumnTypes.at(i),
+        
cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))}));
   }
 
   return std::make_shared<CudfHiveInsertTableHandle>(
@@ -741,16 +738,16 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   const auto& compressionKind =
       
writerOptions->compressionKind.value_or(common::CompressionKind::CompressionKind_SNAPPY);
   std::shared_ptr<core::InsertTableHandle> tableHandle = 
std::make_shared<core::InsertTableHandle>(
-        kHiveConnectorId,
-        makeHiveInsertTableHandle(
-            tableColumnNames, /*inputType->names() clolumn name is different*/
-            inputType->children(),
-            partitionedKey,
-            bucketProperty,
-            makeLocationHandle(writePath, fileName, fileFormat, 
compressionKind, bucketProperty != nullptr),
-            writerOptions,
-            fileFormat,
-            compressionKind));
+      kHiveConnectorId,
+      makeHiveInsertTableHandle(
+          tableColumnNames, /*inputType->names() clolumn name is different*/
+          inputType->children(),
+          partitionedKey,
+          bucketProperty,
+          makeLocationHandle(writePath, fileName, fileFormat, compressionKind, 
bucketProperty != nullptr),
+          writerOptions,
+          fileFormat,
+          compressionKind));
   return std::make_shared<core::TableWriteNode>(
       nextPlanNodeId(),
       inputType,
@@ -1350,12 +1347,7 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   }
   common::SubfieldFilters subfieldFilters;
   tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
-      connectorId,
-      "hive_table",
-      filterPushdownEnabled,
-      std::move(subfieldFilters),
-      remainingFilter,
-      dataColumns);
+      connectorId, "hive_table", filterPushdownEnabled, 
std::move(subfieldFilters), remainingFilter, dataColumns);
 
   // Get assignments and out names.
   std::vector<std::string> outNames;
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 019c38fd76..169744af9b 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -18,11 +18,11 @@ set -exu
 
 CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2025_10_06
+VELOX_BRANCH=2025_10_08
 VELOX_HOME=""
 RUN_SETUP_SCRIPT=ON
 VELOX_ENHANCED_REPO=https://github.com/IBM/velox.git
-VELOX_ENHANCED_BRANCH=ibm-2025_10_06
+VELOX_ENHANCED_BRANCH=ibm-2025_10_08
 ENABLE_ENHANCED_FEATURES=OFF
 
 # Developer use only for testing Velox PR.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08) (#10849)

Reply via email to