This is an automated email from the ASF dual-hosted git repository.

JkSelf pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 9acd3d7015 [GLUTEN-12022][VL] Extend S3 filesystem registration in 
Gluten (#12026)
9acd3d7015 is described below

commit 9acd3d70155d187afdb2b7b9d7b337f327426585
Author: Reema <[email protected]>
AuthorDate: Fri Jun 5 00:58:41 2026 +0000

    [GLUTEN-12022][VL] Extend S3 filesystem registration in Gluten (#12026)
---
 cpp/core/utils/tac/ffor.hpp                |  3 +-
 cpp/velox/CMakeLists.txt                   |  1 +
 cpp/velox/compute/VeloxBackend.cc          |  8 +++--
 cpp/velox/filesystem/GlutenS3FileSystem.cc | 54 ++++++++++++++++++++++++++++
 cpp/velox/filesystem/GlutenS3FileSystem.h  | 44 +++++++++++++++++++++++
 cpp/velox/tests/CMakeLists.txt             |  3 ++
 cpp/velox/tests/GlutenS3FileSystemTest.cc  | 57 ++++++++++++++++++++++++++++++
 7 files changed, 165 insertions(+), 5 deletions(-)

diff --git a/cpp/core/utils/tac/ffor.hpp b/cpp/core/utils/tac/ffor.hpp
index 761a1ec262..0d632efff5 100644
--- a/cpp/core/utils/tac/ffor.hpp
+++ b/cpp/core/utils/tac/ffor.hpp
@@ -437,8 +437,7 @@ inline size_t decompress64Impl(const uint8_t* input, size_t 
inputSize, uint64_t*
     if (bw == kBwTailMarker) {
       if (count > 0) {
         // memcpy handles any alignment, no special case needed.
-        std::memcpy(
-            reinterpret_cast<uint8_t*>(output) + nDecoded * sizeof(uint64_t), 
inPtr, count * sizeof(uint64_t));
+        std::memcpy(reinterpret_cast<uint8_t*>(output) + nDecoded * 
sizeof(uint64_t), inPtr, count * sizeof(uint64_t));
         nDecoded += count;
       }
       break;
diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt
index d0a59da032..2fcc6b9e51 100644
--- a/cpp/velox/CMakeLists.txt
+++ b/cpp/velox/CMakeLists.txt
@@ -211,6 +211,7 @@ set(VELOX_SRCS
     utils/VeloxWriterUtils.cc)
 
 if(ENABLE_S3)
+  list(APPEND VELOX_SRCS filesystem/GlutenS3FileSystem.cc)
   find_package(ZLIB)
 endif()
 
diff --git a/cpp/velox/compute/VeloxBackend.cc 
b/cpp/velox/compute/VeloxBackend.cc
index 801fc9d835..78b9f5ee04 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -40,6 +40,9 @@
 
 #include "compute/VeloxRuntime.h"
 #include "config/VeloxConfig.h"
+#ifdef ENABLE_S3
+#include "filesystem/GlutenS3FileSystem.h"
+#endif
 #include "jni/JniFileSystem.h"
 #include "memory/GlutenBufferedInputBuilder.h"
 #include "operators/functions/SparkExprToSubfieldFilterParser.h"
@@ -56,7 +59,6 @@
 #include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h" 
// @manual
 #include "velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.h"
 #include 
"velox/connectors/hive/storage_adapters/hdfs/RegisterHdfsFileSystem.h" // 
@manual
-#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h" 
// @manual
 #include "velox/dwio/orc/reader/OrcReader.h"
 #include "velox/dwio/parquet/RegisterParquetReader.h"
 #include "velox/dwio/parquet/RegisterParquetWriter.h"
@@ -156,7 +158,7 @@ void VeloxBackend::init(
   velox::filesystems::registerHdfsFileSystem();
 #endif
 #ifdef ENABLE_S3
-  velox::filesystems::registerS3FileSystem();
+  registerGlutenS3FileSystem();
 #endif
 #ifdef ENABLE_GCS
   velox::filesystems::registerGcsFileSystem();
@@ -377,7 +379,7 @@ void VeloxBackend::tearDown() {
   }
 #endif
 #ifdef ENABLE_S3
-  velox::filesystems::finalizeS3FileSystem();
+  finalizeGlutenS3FileSystem();
 #endif
 
   // Destruct IOThreadPoolExecutor will join all threads.
diff --git a/cpp/velox/filesystem/GlutenS3FileSystem.cc 
b/cpp/velox/filesystem/GlutenS3FileSystem.cc
new file mode 100644
index 0000000000..008da2d7ae
--- /dev/null
+++ b/cpp/velox/filesystem/GlutenS3FileSystem.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem/GlutenS3FileSystem.h"
+
+#include <memory>
+#include <utility>
+
+#include "velox/common/file/File.h"
+
+namespace gluten {
+
+namespace velox = facebook::velox;
+namespace filesystems = facebook::velox::filesystems;
+
+namespace {
+
+std::shared_ptr<filesystems::FileSystem> glutenS3FileSystemFactory(
+    std::string_view bucketName,
+    std::shared_ptr<const velox::config::ConfigBase> config) {
+  return std::make_shared<GlutenS3FileSystem>(bucketName, config);
+}
+
+} // namespace
+
+std::unique_ptr<velox::WriteFile> GlutenS3FileSystem::openFileForWrite(
+    std::string_view s3Path,
+    const filesystems::FileOptions& options) {
+  return filesystems::S3FileSystem::openFileForWrite(s3Path, options);
+}
+
+void registerGlutenS3FileSystem(filesystems::CacheKeyFn cacheKeyFunc) {
+  filesystems::registerS3FileSystem(std::move(cacheKeyFunc), 
glutenS3FileSystemFactory);
+}
+
+void finalizeGlutenS3FileSystem() {
+  filesystems::finalizeS3FileSystem();
+}
+
+} // namespace gluten
diff --git a/cpp/velox/filesystem/GlutenS3FileSystem.h 
b/cpp/velox/filesystem/GlutenS3FileSystem.h
new file mode 100644
index 0000000000..104eb1abbc
--- /dev/null
+++ b/cpp/velox/filesystem/GlutenS3FileSystem.h
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <string_view>
+
+#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h"
+#include "velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.h"
+
+namespace gluten {
+
+namespace velox = facebook::velox;
+
+class GlutenS3FileSystem : public velox::filesystems::S3FileSystem {
+ public:
+  GlutenS3FileSystem(std::string_view bucketName, const std::shared_ptr<const 
velox::config::ConfigBase>& config)
+      : S3FileSystem(bucketName, config) {}
+
+  std::unique_ptr<velox::WriteFile> openFileForWrite(
+      std::string_view s3Path,
+      const velox::filesystems::FileOptions& options) override;
+};
+
+void registerGlutenS3FileSystem(velox::filesystems::CacheKeyFn cacheKeyFunc = 
nullptr);
+
+void finalizeGlutenS3FileSystem();
+
+} // namespace gluten
diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt
index 00c0c2df69..ebcac56bc5 100644
--- a/cpp/velox/tests/CMakeLists.txt
+++ b/cpp/velox/tests/CMakeLists.txt
@@ -137,6 +137,9 @@ add_velox_test(spark_functions_test SOURCES 
SparkFunctionTest.cc
 add_velox_test(runtime_test SOURCES RuntimeTest.cc)
 add_velox_test(velox_memory_test SOURCES MemoryManagerTest.cc)
 add_velox_test(buffer_outputstream_test SOURCES BufferOutputStreamTest.cc)
+if(ENABLE_S3)
+  add_velox_test(gluten_s3_file_system_test SOURCES GlutenS3FileSystemTest.cc)
+endif()
 add_velox_test(scoped_timer_test SOURCES ScopedTimerTest.cc)
 if(BUILD_EXAMPLES)
   add_velox_test(my_udf_test SOURCES MyUdfTest.cc)
diff --git a/cpp/velox/tests/GlutenS3FileSystemTest.cc 
b/cpp/velox/tests/GlutenS3FileSystemTest.cc
new file mode 100644
index 0000000000..00cf3eef24
--- /dev/null
+++ b/cpp/velox/tests/GlutenS3FileSystemTest.cc
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filesystem/GlutenS3FileSystem.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "velox/common/config/Config.h"
+#include "velox/common/file/FileSystems.h"
+#include "velox/connectors/hive/storage_adapters/s3fs/S3Config.h"
+
+namespace gluten {
+namespace {
+
+namespace velox = facebook::velox;
+namespace filesystems = facebook::velox::filesystems;
+
+TEST(GlutenS3FileSystemTest, registeredFileSystemUsesGlutenSubclass) {
+  registerGlutenS3FileSystem();
+
+  auto config = 
std::make_shared<velox::config::ConfigBase>(std::unordered_map<std::string, 
std::string>{
+      
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kEndpoint), 
"http://127.0.0.1:9000"},
+      
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kAccessKey), 
"access"},
+      
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kSecretKey), 
"secret"},
+      
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kSSLEnabled),
 "false"},
+      
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kPathStyleAccess),
 "true"},
+      
{filesystems::S3Config::baseConfigKey(filesystems::S3Config::Keys::kIMDSEnabled),
 "false"}});
+
+  auto fileSystem = filesystems::getFileSystem("s3://gluten-test-bucket/test", 
config);
+
+  EXPECT_NE(dynamic_cast<GlutenS3FileSystem*>(fileSystem.get()), nullptr);
+  EXPECT_EQ(fileSystem->name(), "S3");
+
+  fileSystem.reset();
+  finalizeGlutenS3FileSystem();
+}
+
+} // namespace
+} // namespace gluten


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to