This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 6ffab3a3fe [GLUTEN-6887][VL] Daily Update Velox Version (2024_12_04)
(#8137)
6ffab3a3fe is described below
commit 6ffab3a3fe903bd26b06ca98992edee97c7ffdf7
Author: Gluten Performance Bot
<[email protected]>
AuthorDate: Wed Dec 4 20:08:32 2024 +0800
[GLUTEN-6887][VL] Daily Update Velox Version (2024_12_04) (#8137)
Upstream Velox's New Commits:
2c57445fd by zuyu, refactor: Use const ref in Filter (11725)
f209751d2 by Wei He, refactor: Extract the definition of
Vector::Options::TimestampPrecision to Utils.h (11722)
86d6f3389 by aditi-pandit, refactor: Change C style casts to C++ style
(Part 2) (11684)
f89b68d27 by Kevin Wilfong, fix: Throw on negative numbers in url_decode
(11734)
1cab68067 by Kostas Xirogiannopoulos, feat(planbuilder): Enable passing
compressionKind via tableWrite (11724)
a94e87065 by Sergey Pershin, fix: Parsing of fractions of a second in
parse_datetime() (11723)
a0bbea2f3 by zhli1142015, feat(ABFS): Support SAS and OAuth config (11623)
4dd6499f0 by Minhan Cao, build: Removed GTest::gtest_main from
CMakeLists.txt for velox_simple_aggregate_test (11668)
fc5aa37fe by Jialiang Tan, fix: Fix HashJoinTest.buildReclaimedMemoryReport
(11721)
2dae23fcb by yingsu00, misc: Add isBlockedTiming to PlanNodeStats (11361)
0bb7e64c4 by Yang Zhang, refactor(sparksql): Speed up sparksql compilation
by splitting function registrations (11565)
46fd360d2 by rui-mo, fix: Use input directly in createLongDecimalVector if
aligned (11648)
db0c3e697 by Chengcheng Jin, fix: Fix Stream not close because it holds in
TreeOfLoser (11660)
c542ebc21 by mohsaka, refactor: Clean up, match presto, and optimize
ipprefix string -> ipprefix (11670)
0a685b123 by Pavel Solodovnikov, fix: Some minor fixes for `RawVector`
(11693)
2872a16c9 by Jacob Wujciak-Jens, build(ci): Enforce `build(ci)` prefix on
dependabot PRs (11714)
9bbe4cbff by Raymond Wu, feat(column selector): allow row type (11700)
debdf42e6 by Pavel Solodovnikov, build: Make benchmarks respect
`VELOX_ENABLE_BENCHMARKS` cmake option (11692)
d838d0744 by Yenda Li, fix: MergeExchange hangs at shutdown after
abort/cancel (11718)
0dccaeabb by Masha Basmanova, refactor: Reduce dependencies in
LocalRunnerTestBase.h (11717)
3e20a7afd by Masha Basmanova, feat: Allow to control level of detail in
MultiFragmentPlan::toString (11715)
331ca86a6 by Masha Basmanova, refactor: Move MultiFragmentPlan::toString
into its own .cpp file (11713)
557b23827 by Masha Basmanova, refactor: Remove Task dependency from
MultiFragmentPlan.h (11712)
a4d093454 by duanmeng, feat: Print operator stats in query replayer (11699)
a969af8c5 by Wei He, fix: Fix flaky velox_local_runner_test due to memory
pool name collision (11683)
6d08f3b5a by Pedro Eugenio Rocha Pedreira, fix(merge-join): Produce output
before advancing key comparison (11605)
6ff029eea by Joe Giardino, Velox: fix complex vector memory tracking (11630)
d9aaa6afd by Xiaoxuan Meng, misc: Remove legacy code in task and driver
(11696)
---
cpp/velox/CMakeLists.txt | 2 ++
.../operators/functions/RegistrationAllFunctions.cc | 2 +-
cpp/velox/operators/plannodes/RowVectorStream.h | 21 +++++++++++++++++++--
cpp/velox/substrait/SubstraitToVeloxPlan.cc | 7 -------
cpp/velox/tests/VeloxSubstraitRoundTripTest.cc | 1 -
dev/vcpkg/vcpkg.json | 3 ++-
ep/build-velox/src/get_velox.sh | 2 +-
7 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt
index 4503d5947d..17b824b619 100644
--- a/cpp/velox/CMakeLists.txt
+++ b/cpp/velox/CMakeLists.txt
@@ -132,6 +132,7 @@ macro(find_azure)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
find_package(azure-storage-blobs-cpp CONFIG REQUIRED)
find_package(azure-storage-files-datalake-cpp CONFIG REQUIRED)
+ find_package(azure-identity-cpp CONFIG REQUIRED)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_BCK})
endmacro()
@@ -337,6 +338,7 @@ if(ENABLE_ABFS)
find_azure()
target_link_libraries(velox PUBLIC Azure::azure-storage-blobs)
target_link_libraries(velox PUBLIC Azure::azure-storage-files-datalake)
+ target_link_libraries(velox PUBLIC Azure::azure-identity)
endif()
if(BUILD_EXAMPLES)
diff --git a/cpp/velox/operators/functions/RegistrationAllFunctions.cc
b/cpp/velox/operators/functions/RegistrationAllFunctions.cc
index ea9d3f4202..06147d6d43 100644
--- a/cpp/velox/operators/functions/RegistrationAllFunctions.cc
+++ b/cpp/velox/operators/functions/RegistrationAllFunctions.cc
@@ -29,8 +29,8 @@
#include "velox/functions/sparksql/DecimalArithmetic.h"
#include "velox/functions/sparksql/Hash.h"
#include "velox/functions/sparksql/Rand.h"
-#include "velox/functions/sparksql/Register.h"
#include "velox/functions/sparksql/aggregates/Register.h"
+#include "velox/functions/sparksql/registration/Register.h"
#include "velox/functions/sparksql/window/WindowFunctionsRegistration.h"
using namespace facebook;
diff --git a/cpp/velox/operators/plannodes/RowVectorStream.h
b/cpp/velox/operators/plannodes/RowVectorStream.h
index e5a469afee..fcf2ffd15f 100644
--- a/cpp/velox/operators/plannodes/RowVectorStream.h
+++ b/cpp/velox/operators/plannodes/RowVectorStream.h
@@ -21,6 +21,7 @@
#include "memory/VeloxColumnarBatch.h"
#include "velox/exec/Driver.h"
#include "velox/exec/Operator.h"
+#include "velox/exec/Task.h"
namespace gluten {
class RowVectorStream {
@@ -46,8 +47,16 @@ class RowVectorStream {
// As of now, non-zero running threads usually happens when:
// 1. Task A spills task B;
// 2. Task A trys to grow buffers created by task B, during which spill
is requested on task A again.
- facebook::velox::exec::SuspendedSection ss(driverCtx_->driver);
+ // facebook::velox::exec::SuspendedSection ss(driverCtx_->driver);
+ auto driver = driverCtx_->driver;
+ if (driver->task()->enterSuspended(driver->state()) !=
facebook::velox::exec::StopReason::kNone) {
+ VELOX_FAIL("Terminate detected when entering suspended section");
+ }
hasNext = iterator_->hasNext();
+ if (driver->task()->leaveSuspended(driver->state()) !=
facebook::velox::exec::StopReason::kNone) {
+ LOG(WARNING) << "Terminate detected when leaving suspended section for
driver " << driver->driverCtx()->driverId
+ << " from task " << driver->task()->taskId();
+ }
}
if (!hasNext) {
finished_ = true;
@@ -64,8 +73,16 @@ class RowVectorStream {
{
// We are leaving Velox task execution and are probably entering Spark
code through JNI. Suspend the current
// driver to make the current task open to spilling.
- facebook::velox::exec::SuspendedSection ss(driverCtx_->driver);
+ // facebook::velox::exec::SuspendedSection ss(driverCtx_->driver);
+ auto driver = driverCtx_->driver;
+ if (driver->task()->enterSuspended(driver->state()) !=
facebook::velox::exec::StopReason::kNone) {
+ VELOX_FAIL("Terminate detected when entering suspended section");
+ }
cb = iterator_->next();
+ if (driver->task()->leaveSuspended(driver->state()) !=
facebook::velox::exec::StopReason::kNone) {
+ LOG(WARNING) << "Terminate detected when leaving suspended section for
driver " << driver->driverCtx()->driverId
+ << " from task " << driver->task()->taskId();
+ }
}
const std::shared_ptr<VeloxColumnarBatch>& vb =
VeloxColumnarBatch::from(pool_, cb);
auto vp = vb->getRowVector();
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 3ceccca4a3..e0cc5a184b 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -140,13 +140,6 @@ RowTypePtr getJoinOutputType(
VELOX_FAIL("Output should include left or right columns.");
}
-// Returns the path vector used to create Subfield.
-std::vector<std::unique_ptr<common::Subfield::PathElement>> getPath(const
std::string& field) {
- std::vector<std::unique_ptr<common::Subfield::PathElement>> path;
- path.push_back(std::make_unique<common::Subfield::NestedField>(field));
- return path;
-}
-
} // namespace
core::PlanNodePtr SubstraitToVeloxPlanConverter::processEmit(
diff --git a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
index 68e79c80f5..804ba6c413 100644
--- a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
+++ b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
@@ -27,7 +27,6 @@
#include "substrait/SubstraitToVeloxPlan.h"
#include "substrait/VeloxToSubstraitPlan.h"
-#include "velox/functions/sparksql/Register.h"
#include "velox/vector/tests/utils/VectorTestBase.h"
#include "substrait/VariantToVectorConverter.h"
diff --git a/dev/vcpkg/vcpkg.json b/dev/vcpkg/vcpkg.json
index 66ba246f53..c0123cfbe9 100644
--- a/dev/vcpkg/vcpkg.json
+++ b/dev/vcpkg/vcpkg.json
@@ -90,7 +90,8 @@
"description": "Velox ABFS Support",
"dependencies": [
"azure-storage-blobs-cpp",
- "azure-storage-files-datalake-cpp"
+ "azure-storage-files-datalake-cpp",
+ "azure-identity-cpp"
]
},
"duckdb": {
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 62e7e33f82..962a3b415b 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
set -exu
VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_12_03
+VELOX_BRANCH=2024_12_04
VELOX_HOME=""
OS=`uname -s`
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]