This is an automated email from the ASF dual-hosted git repository.
JkSelf pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 9acd3d7015 [GLUTEN-12022][VL] Extend S3 filesystem registration in
Gluten (#12026)
9acd3d7015 is described below
commit 9acd3d70155d187afdb2b7b9d7b337f327426585
Author: Reema <[email protected]>
AuthorDate: Fri Jun 5 00:58:41 2026 +0000
[GLUTEN-12022][VL] Extend S3 filesystem registration in Gluten (#12026)
---
cpp/core/utils/tac/ffor.hpp | 3 +-
cpp/velox/CMakeLists.txt | 1 +
cpp/velox/compute/VeloxBackend.cc | 8 +++--
cpp/velox/filesystem/GlutenS3FileSystem.cc | 54 ++++++++++++++++++++++++++++
cpp/velox/filesystem/GlutenS3FileSystem.h | 44 +++++++++++++++++++++++
cpp/velox/tests/CMakeLists.txt | 3 ++
cpp/velox/tests/GlutenS3FileSystemTest.cc | 57 ++++++++++++++++++++++++++++++
7 files changed, 165 insertions(+), 5 deletions(-)
diff --git a/cpp/core/utils/tac/ffor.hpp b/cpp/core/utils/tac/ffor.hpp
index 761a1ec262..0d632efff5 100644
--- a/cpp/core/utils/tac/ffor.hpp
+++ b/cpp/core/utils/tac/ffor.hpp
@@ -437,8 +437,7 @@ inline size_t decompress64Impl(const uint8_t* input, size_t
inputSize, uint64_t*
if (bw == kBwTailMarker) {
if (count > 0) {
// memcpy handles any alignment, no special case needed.
- std::memcpy(
- reinterpret_cast<uint8_t*>(output) + nDecoded * sizeof(uint64_t),
inPtr, count * sizeof(uint64_t));
+ std::memcpy(reinterpret_cast<uint8_t*>(output) + nDecoded *
sizeof(uint64_t), inPtr, count * sizeof(uint64_t));
nDecoded += count;
}
break;
diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt
index d0a59da032..2fcc6b9e51 100644
--- a/cpp/velox/CMakeLists.txt
+++ b/cpp/velox/CMakeLists.txt
@@ -211,6 +211,7 @@ set(VELOX_SRCS
utils/VeloxWriterUtils.cc)
if(ENABLE_S3)
+ list(APPEND VELOX_SRCS filesystem/GlutenS3FileSystem.cc)
find_package(ZLIB)
endif()
diff --git a/cpp/velox/compute/VeloxBackend.cc
b/cpp/velox/compute/VeloxBackend.cc
index 801fc9d835..78b9f5ee04 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -40,6 +40,9 @@
#include "compute/VeloxRuntime.h"
#include "config/VeloxConfig.h"
+#ifdef ENABLE_S3
+#include "filesystem/GlutenS3FileSystem.h"
+#endif
#include "jni/JniFileSystem.h"
#include "memory/GlutenBufferedInputBuilder.h"
#include "operators/functions/SparkExprToSubfieldFilterParser.h"
@@ -56,7 +59,6 @@
#include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h"
// @manual
#include "velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.h"
#include
"velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" //
@manual
-#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h"
// @manual
#include "velox/dwio/orc/reader/OrcReader.h"
#include "velox/dwio/parquet/RegisterParquetReader.h"
#include "velox/dwio/parquet/RegisterParquetWriter.h"
@@ -156,7 +158,7 @@ void VeloxBackend::init(
velox::filesystems::registerHdfsFileSystem();
#endif
#ifdef ENABLE_S3
- velox::filesystems::registerS3FileSystem();
+ registerGlutenS3FileSystem();
#endif
#ifdef ENABLE_GCS
velox::filesystems::registerGcsFileSystem();
@@ -377,7 +379,7 @@ void VeloxBackend::tearDown() {
}
#endif
#ifdef ENABLE_S3
- velox::filesystems::finalizeS3FileSystem();
+ finalizeGlutenS3FileSystem();
#endif
// Destruct IOThreadPoolExecutor will join all threads.
diff --git a/cpp/velox/filesystem/GlutenS3FileSystem.cc
b/cpp/velox/filesystem/GlutenS3FileSystem.cc
new file mode 100644
index 0000000000..008da2d7ae
--- /dev/null
+++ b/cpp/velox/filesystem/GlutenS3FileSystem.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem/GlutenS3FileSystem.h"
+
+#include <memory>
+#include <utility>
+
+#include "velox/common/file/File.h"
+
+namespace gluten {
+
+namespace velox = facebook::velox;
+namespace filesystems = facebook::velox::filesystems;
+
+namespace {
+
+std::shared_ptr<filesystems::FileSystem> glutenS3FileSystemFactory(
+ std::string_view bucketName,
+ std::shared_ptr<const velox::config::ConfigBase> config) {
+ return std::make_shared<GlutenS3FileSystem>(bucketName, config);
+}
+
+} // namespace
+
+std::unique_ptr<velox::WriteFile> GlutenS3FileSystem::openFileForWrite(
+ std::string_view s3Path,
+ const filesystems::FileOptions& options) {
+ return filesystems::S3FileSystem::openFileForWrite(s3Path, options);
+}
+
+void registerGlutenS3FileSystem(filesystems::CacheKeyFn cacheKeyFunc) {
+ filesystems::registerS3FileSystem(std::move(cacheKeyFunc),
glutenS3FileSystemFactory);
+}
+
+void finalizeGlutenS3FileSystem() {
+ filesystems::finalizeS3FileSystem();
+}
+
+} // namespace gluten
diff --git a/cpp/velox/filesystem/GlutenS3FileSystem.h
b/cpp/velox/filesystem/GlutenS3FileSystem.h
new file mode 100644
index 0000000000..104eb1abbc
--- /dev/null
+++ b/cpp/velox/filesystem/GlutenS3FileSystem.h
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <string_view>
+
+#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h"
+#include "velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.h"
+
+namespace gluten {
+
+namespace velox = facebook::velox;
+
+class GlutenS3FileSystem : public velox::filesystems::S3FileSystem {
+ public:
+ GlutenS3FileSystem(std::string_view bucketName, const std::shared_ptr<const
velox::config::ConfigBase>& config)
+ : S3FileSystem(bucketName, config) {}
+
+ std::unique_ptr<velox::WriteFile> openFileForWrite(
+ std::string_view s3Path,
+ const velox::filesystems::FileOptions& options) override;
+};
+
+void registerGlutenS3FileSystem(velox::filesystems::CacheKeyFn cacheKeyFunc =
nullptr);
+
+void finalizeGlutenS3FileSystem();
+
+} // namespace gluten
diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt
index 00c0c2df69..ebcac56bc5 100644
--- a/cpp/velox/tests/CMakeLists.txt
+++ b/cpp/velox/tests/CMakeLists.txt
@@ -137,6 +137,9 @@ add_velox_test(spark_functions_test SOURCES
SparkFunctionTest.cc
add_velox_test(runtime_test SOURCES RuntimeTest.cc)
add_velox_test(velox_memory_test SOURCES MemoryManagerTest.cc)
add_velox_test(buffer_outputstream_test SOURCES BufferOutputStreamTest.cc)
+if(ENABLE_S3)
+ add_velox_test(gluten_s3_file_system_test SOURCES GlutenS3FileSystemTest.cc)
+endif()
add_velox_test(scoped_timer_test SOURCES ScopedTimerTest.cc)
if(BUILD_EXAMPLES)
add_velox_test(my_udf_test SOURCES MyUdfTest.cc)
diff --git a/cpp/velox/tests/GlutenS3FileSystemTest.cc
b/cpp/velox/tests/GlutenS3FileSystemTest.cc
new file mode 100644
index 0000000000..00cf3eef24
--- /dev/null
+++ b/cpp/velox/tests/GlutenS3FileSystemTest.cc
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem/GlutenS3FileSystem.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "velox/common/config/Config.h"
+#include "velox/common/file/FileSystems.h"
+#include "velox/connectors/hive/storage_adapters/s3fs/S3Config.h"
+
+namespace gluten {
+namespace {
+
+namespace velox = facebook::velox;
+namespace filesystems = facebook::velox::filesystems;
+
+TEST(GlutenS3FileSystemTest, registeredFileSystemUsesGlutenSubclass) {
+ registerGlutenS3FileSystem();
+
+ auto config =
std::make_shared<velox::config::ConfigBase>(std::unordered_map<std::string,
std::string>{
+
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kEndpoint),
"http://127.0.0.1:9000"},
+
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kAccessKey),
"access"},
+
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kSecretKey),
"secret"},
+
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kSSLEnabled),
"false"},
+
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kPathStyleAccess),
"true"},
+
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kIMDSEnabled),
"false"}});
+
+ auto fileSystem = filesystems::getFileSystem("s3://gluten-test-bucket/test",
config);
+
+ EXPECT_NE(dynamic_cast<GlutenS3FileSystem*>(fileSystem.get()), nullptr);
+ EXPECT_EQ(fileSystem->name(), "S3");
+
+ fileSystem.reset();
+ finalizeGlutenS3FileSystem();
+}
+
+} // namespace
+} // namespace gluten
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]