This is an automated email from the ASF dual-hosted git repository.
marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 1d4f2d271b [GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08)
(#10849)
1d4f2d271b is described below
commit 1d4f2d271bdda7b6e932fb21c5707715b451df9c
Author: Gluten Performance Bot
<[email protected]>
AuthorDate: Wed Oct 8 11:51:16 2025 +0100
[GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08) (#10849)
* [GLUTEN-6887][VL] Daily Update Velox Version (2025_10_08)
Upstream Velox's New Commits:
81d943df0 by Vlad, feat: Add IConfig interface (14889)
d93affa03 by Timothy Meehan, build: Add 'abseil' as a build dependency for
re2 (14849)
21da67163 by Simon Eves, fix(build): Bump Hadoop version for more reliable
download (14928)
211380331 by dependabot[bot], build(ci): Bump actions/cache from 4.2.4 to
4.3.0 (14993)
f266c8486 by Deepak Majeti, docs: Fix iceberg functions formatting and some
warnings (14911)
80be1866a by Masha Basmanova, feat: Add Variant::toJson(Type) API that
doesn't require shared_ptr (15070)
5c1dac468 by Xiaoxuan Meng, fix: Fix the test task cancellation condition
(15065)
aa406cbd6 by Deepak Majeti, feat(cudf): Remove gflags in favor of
CudfConfig (14963)
78159fa46 by Xiaoxuan Meng, refactor: Remove legacy task spill code after
update prestissimo (15053)
a31539e70 by Pedro Eugenio Rocha Pedreira, docs: Add new component and
maintainer for LibcuDF bindings (15057)
5a31ae324 by lingbin, fix(ssdcache): Write rate calculation (15028)
207c1b679 by Xiaoxuan Meng, fix: Fix sxtream failed test by supporting task
cancellation in test (15052)
Signed-off-by: glutenperfbot <[email protected]>
---------
Signed-off-by: glutenperfbot <[email protected]>
Co-authored-by: glutenperfbot <[email protected]>
Co-authored-by: Rong Ma <[email protected]>
---
cpp/core/config/GlutenConfig.h | 4 +-
cpp/velox/compute/VeloxBackend.cc | 16 ++++--
cpp/velox/compute/WholeStageResultIterator.cc | 3 +-
cpp/velox/config/VeloxConfig.h | 2 +-
cpp/velox/jni/VeloxJniWrapper.cc | 21 ++------
cpp/velox/substrait/SubstraitToVeloxPlan.cc | 74 ++++++++++++---------------
ep/build-velox/src/get_velox.sh | 4 +-
7 files changed, 56 insertions(+), 68 deletions(-)
diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index 2f5992b9c0..110c741a4b 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -95,9 +95,9 @@ const std::string kSparkJsonIgnoreNullFields =
"spark.sql.jsonGenerator.ignoreNu
// cudf
const std::string kCudfEnabled = "spark.gluten.sql.columnar.cudf";
-const bool kCudfEnabledDefault = "true";
+constexpr bool kCudfEnabledDefault = true;
const std::string kDebugCudf = "spark.gluten.sql.debug.cudf";
-const bool kDebugCudfDefault = "false";
+const std::string kDebugCudfDefault = "false";
std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t
planDataLength);
diff --git a/cpp/velox/compute/VeloxBackend.cc
b/cpp/velox/compute/VeloxBackend.cc
index 54cb08bf57..fedb5aa171 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -28,6 +28,7 @@
#include "utils/qat/QatCodec.h"
#endif
#ifdef GLUTEN_ENABLE_GPU
+#include "velox/experimental/cudf/CudfConfig.h"
#include "velox/experimental/cudf/connectors/hive/CudfHiveConnector.h"
#include "velox/experimental/cudf/exec/ToCudf.h"
#endif
@@ -166,11 +167,16 @@ void VeloxBackend::init(
#ifdef GLUTEN_ENABLE_GPU
if (backendConf_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
- FLAGS_velox_cudf_debug = backendConf_->get<bool>(kDebugCudf,
kDebugCudfDefault);
- FLAGS_velox_cudf_memory_resource =
backendConf_->get<std::string>(kCudfMemoryResource, kCudfMemoryResourceDefault);
- auto& options = velox::cudf_velox::CudfOptions::getInstance();
- options.memoryPercent = backendConf_->get<int32_t>(kCudfMemoryPercent,
kCudfMemoryPercentDefault);
- velox::cudf_velox::registerCudf(options);
+ std::unordered_map<std::string, std::string> options = {
+ {velox::cudf_velox::CudfConfig::kCudfEnabled, "true"},
+ {velox::cudf_velox::CudfConfig::kCudfDebugEnabled,
backendConf_->get(kDebugCudf, kDebugCudfDefault)},
+ {velox::cudf_velox::CudfConfig::kCudfMemoryResource,
+ backendConf_->get(kCudfMemoryResource, kCudfMemoryResourceDefault)},
+ {velox::cudf_velox::CudfConfig::kCudfMemoryPercent,
+ backendConf_->get(kCudfMemoryPercent, kCudfMemoryPercentDefault)}};
+ auto& cudfConfig = velox::cudf_velox::CudfConfig::getInstance();
+ cudfConfig.initialize(std::move(options));
+ velox::cudf_velox::registerCudf();
}
#endif
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index fb3e1550a8..7846898cb7 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -24,6 +24,7 @@
#ifdef GLUTEN_ENABLE_GPU
#include <cudf/io/types.hpp>
#include <mutex>
+#include "velox/experimental/cudf/CudfConfig.h"
#include "velox/experimental/cudf/connectors/hive/CudfHiveConnectorSplit.h"
#include "velox/experimental/cudf/exec/ToCudf.h"
#endif
@@ -661,7 +662,7 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
std::to_string(veloxCfg_->get<bool>(kSparkJsonIgnoreNullFields, true));
#ifdef GLUTEN_ENABLE_GPU
- configs[cudf_velox::kCudfEnabled] =
std::to_string(veloxCfg_->get<bool>(kCudfEnabled, false));
+ configs[velox::cudf_velox::CudfConfig::kCudfEnabled] =
std::to_string(veloxCfg_->get<bool>(kCudfEnabled, false));
#endif
const auto setIfExists = [&](const std::string& glutenKey, const
std::string& veloxKey) {
diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
index 690fbd59f1..4406887978 100644
--- a/cpp/velox/config/VeloxConfig.h
+++ b/cpp/velox/config/VeloxConfig.h
@@ -179,7 +179,7 @@ const std::string kCudfMemoryResourceDefault =
// Initial percent of GPU memory to allocate for memory resource for one thread
const std::string kCudfMemoryPercent =
"spark.gluten.sql.columnar.backend.velox.cudf.memoryPercent";
-const int32_t kCudfMemoryPercentDefault = 50;
+const std::string kCudfMemoryPercentDefault = "50";
/// Preferred size of batches in bytes to be returned by operators.
const std::string kVeloxPreferredBatchBytes =
"spark.gluten.sql.columnar.backend.velox.preferredBatchBytes";
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index 9e4f37687a..0052880143 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -243,30 +243,19 @@ JNIEXPORT jlong JNICALL
Java_org_apache_gluten_columnarbatch_VeloxColumnarBatchJ
auto repeatedBatch =
ObjectStore::retrieve<ColumnarBatch>(repeatedBatchHandle);
auto nonRepeatedBatch =
ObjectStore::retrieve<ColumnarBatch>(nonRepeatedBatchHandle);
- GLUTEN_CHECK(rowNums == nonRepeatedBatch->numRows(),
- "Row numbers after repeated do not match the expected size");
+ GLUTEN_CHECK(rowNums == nonRepeatedBatch->numRows(), "Row numbers after
repeated do not match the expected size");
// wrap repeatedBatch's rowVector in dictionary vector.
auto vb = std::dynamic_pointer_cast<VeloxColumnarBatch>(repeatedBatch);
auto rowVector = vb->getRowVector();
std::vector<VectorPtr> outputs(rowVector->childrenSize());
for (int i = 0; i < outputs.size(); i++) {
- outputs[i] = BaseVector::wrapInDictionary(
- nullptr /*nulls*/,
- repeatedIndices,
- rowNums,
- rowVector->childAt(i));
+ outputs[i] = BaseVector::wrapInDictionary(nullptr /*nulls*/,
repeatedIndices, rowNums, rowVector->childAt(i));
}
- auto newRowVector = std::make_shared<RowVector>(
- veloxPool.get(),
- rowVector->type(),
- BufferPtr(nullptr),
- rowNums,
- std::move(outputs));
+ auto newRowVector =
+ std::make_shared<RowVector>(veloxPool.get(), rowVector->type(),
BufferPtr(nullptr), rowNums, std::move(outputs));
repeatedBatch =
std::make_shared<VeloxColumnarBatch>(std::move(newRowVector));
- auto newBatch = VeloxColumnarBatch::compose(
- veloxPool.get(),
- {std::move(repeatedBatch), std::move(nonRepeatedBatch)});
+ auto newBatch = VeloxColumnarBatch::compose(veloxPool.get(),
{std::move(repeatedBatch), std::move(nonRepeatedBatch)});
return ctx->saveObject(newBatch);
JNI_METHOD_END(kInvalidObjectHandle)
}
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index ab76f2c56c..ba7a707568 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -168,15 +168,15 @@ bool SplitInfo::canUseCudfConnector() {
bool isEmpty = partitionColumns.empty();
if (!isEmpty) {
- // Check if all maps are empty
- bool allMapsEmpty = true;
- for (const auto& m : partitionColumns) {
- if (!m.empty()) {
- allMapsEmpty = false;
- break;
- }
+ // Check if all maps are empty
+ bool allMapsEmpty = true;
+ for (const auto& m : partitionColumns) {
+ if (!m.empty()) {
+ allMapsEmpty = false;
+ break;
}
- isEmpty = allMapsEmpty;
+ }
+ isEmpty = allMapsEmpty;
}
return isEmpty && format == dwio::common::FileFormat::PARQUET;
}
@@ -596,19 +596,17 @@ std::shared_ptr<connector::hive::HiveInsertTableHandle>
makeHiveInsertTableHandl
}
if (std::find(partitionedBy.cbegin(), partitionedBy.cend(),
tableColumnNames.at(i)) != partitionedBy.cend()) {
++numPartitionColumns;
- columnHandles.emplace_back(
- std::make_shared<connector::hive::HiveColumnHandle>(
- tableColumnNames.at(i),
- connector::hive::HiveColumnHandle::ColumnType::kPartitionKey,
- tableColumnTypes.at(i),
- tableColumnTypes.at(i)));
+
columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>(
+ tableColumnNames.at(i),
+ connector::hive::HiveColumnHandle::ColumnType::kPartitionKey,
+ tableColumnTypes.at(i),
+ tableColumnTypes.at(i)));
} else {
- columnHandles.emplace_back(
- std::make_shared<connector::hive::HiveColumnHandle>(
- tableColumnNames.at(i),
- connector::hive::HiveColumnHandle::ColumnType::kRegular,
- tableColumnTypes.at(i),
- tableColumnTypes.at(i)));
+
columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>(
+ tableColumnNames.at(i),
+ connector::hive::HiveColumnHandle::ColumnType::kRegular,
+ tableColumnTypes.at(i),
+ tableColumnTypes.at(i)));
}
}
VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size());
@@ -635,11 +633,10 @@ std::shared_ptr<CudfHiveInsertTableHandle>
makeCudfHiveInsertTableHandle(
std::vector<std::shared_ptr<const CudfHiveColumnHandle>> columnHandles;
for (int i = 0; i < tableColumnNames.size(); ++i) {
- columnHandles.push_back(
- std::make_shared<CudfHiveColumnHandle>(
- tableColumnNames.at(i),
- tableColumnTypes.at(i),
-
cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))}));
+ columnHandles.push_back(std::make_shared<CudfHiveColumnHandle>(
+ tableColumnNames.at(i),
+ tableColumnTypes.at(i),
+
cudf::data_type{cudf_velox::veloxToCudfTypeId(tableColumnTypes.at(i))}));
}
return std::make_shared<CudfHiveInsertTableHandle>(
@@ -741,16 +738,16 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
const auto& compressionKind =
writerOptions->compressionKind.value_or(common::CompressionKind::CompressionKind_SNAPPY);
std::shared_ptr<core::InsertTableHandle> tableHandle =
std::make_shared<core::InsertTableHandle>(
- kHiveConnectorId,
- makeHiveInsertTableHandle(
- tableColumnNames, /*inputType->names() clolumn name is different*/
- inputType->children(),
- partitionedKey,
- bucketProperty,
- makeLocationHandle(writePath, fileName, fileFormat,
compressionKind, bucketProperty != nullptr),
- writerOptions,
- fileFormat,
- compressionKind));
+ kHiveConnectorId,
+ makeHiveInsertTableHandle(
+ tableColumnNames, /*inputType->names() clolumn name is different*/
+ inputType->children(),
+ partitionedKey,
+ bucketProperty,
+ makeLocationHandle(writePath, fileName, fileFormat, compressionKind,
bucketProperty != nullptr),
+ writerOptions,
+ fileFormat,
+ compressionKind));
return std::make_shared<core::TableWriteNode>(
nextPlanNodeId(),
inputType,
@@ -1350,12 +1347,7 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
}
common::SubfieldFilters subfieldFilters;
tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
- connectorId,
- "hive_table",
- filterPushdownEnabled,
- std::move(subfieldFilters),
- remainingFilter,
- dataColumns);
+ connectorId, "hive_table", filterPushdownEnabled,
std::move(subfieldFilters), remainingFilter, dataColumns);
// Get assignments and out names.
std::vector<std::string> outNames;
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 019c38fd76..169744af9b 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -18,11 +18,11 @@ set -exu
CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2025_10_06
+VELOX_BRANCH=2025_10_08
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
VELOX_ENHANCED_REPO=https://github.com/IBM/velox.git
-VELOX_ENHANCED_BRANCH=ibm-2025_10_06
+VELOX_ENHANCED_BRANCH=ibm-2025_10_08
ENABLE_ENHANCED_FEATURES=OFF
# Developer use only for testing Velox PR.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]