This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new e97a533ada [GLUTEN-8397][CH][Part-1]: Disable hdfs while compiling 
clickhouse backend on macOS (#8400)
e97a533ada is described below

commit e97a533ada332d1f137045d5e65839ccc020000a
Author: Yan Xin <[email protected]>
AuthorDate: Mon Jan 6 20:18:53 2025 +0800

    [GLUTEN-8397][CH][Part-1]: Disable hdfs while compiling clickhouse backend 
on macOS (#8400)
    
    * [CH]: Disable hdfs while compiling clickhouse backend on macOS
    
    * add macos compile support without hdfs
    
    * change unsafe cast to static_cast
---
 cpp-ch/local-engine/CMakeLists.txt                 | 30 ++++++++++++----------
 cpp-ch/local-engine/Common/GlutenSignalHandler.cpp |  4 ++-
 cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp  |  4 +--
 .../Storages/MergeTree/MetaDataHelper.cpp          |  4 +--
 .../Storages/Output/WriteBufferBuilder.cpp         |  4 +++
 .../Storages/SubstraitSource/CMakeLists.txt        | 14 ++++++----
 .../Storages/SubstraitSource/ReadBufferBuilder.cpp | 19 +++++++++++---
 cpp-ch/local-engine/local_engine_jni.cpp           |  2 +-
 8 files changed, 53 insertions(+), 28 deletions(-)

diff --git a/cpp-ch/local-engine/CMakeLists.txt 
b/cpp-ch/local-engine/CMakeLists.txt
index 4b1c643636..f1819ff217 100644
--- a/cpp-ch/local-engine/CMakeLists.txt
+++ b/cpp-ch/local-engine/CMakeLists.txt
@@ -21,9 +21,12 @@ endif()
 
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -ffunction-sections -fdata-sections")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -ffunction-sections 
-fdata-sections")
-set(CMAKE_SHARED_LINKER_FLAGS
-    "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic -Wl,--gc-sections")
-
+if(APPLE)
+  add_definitions(-D_GNU_SOURCE)
+else()
+  set(CMAKE_SHARED_LINKER_FLAGS
+      "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic -Wl,--gc-sections")
+endif()
 if(COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
   set(CMAKE_SHARED_LINKER_FLAGS
       "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
@@ -157,17 +160,18 @@ target_link_libraries(
 
 target_link_libraries(${LOCALENGINE_SHARED_LIB} PUBLIC ch_parquet)
 
-if(ENABLE_JEMALLOC)
-  target_link_options(
-    ${LOCALENGINE_SHARED_LIB} PRIVATE
-    -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch.map
-    -Wl,-Bsymbolic-functions)
-else()
-  target_link_options(
-    ${LOCALENGINE_SHARED_LIB} PRIVATE
-    -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch-hide-jemalloc.map)
+if(NOT APPLE)
+  if(ENABLE_JEMALLOC)
+    target_link_options(
+      ${LOCALENGINE_SHARED_LIB} PRIVATE
+      -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch.map
+      -Wl,-Bsymbolic-functions)
+  else()
+    target_link_options(
+      ${LOCALENGINE_SHARED_LIB} PRIVATE
+      -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch-hide-jemalloc.map)
+  endif()
 endif()
-
 if("${CMAKE_BUILD_TYPE}" MATCHES "Debug")
   set(LOCALENGINE_SHARED_LIB_NAME "libchd.so")
 else()
diff --git a/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp 
b/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
index 6120eac9e9..9b7e1473e4 100644
--- a/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
+++ b/cpp-ch/local-engine/Common/GlutenSignalHandler.cpp
@@ -159,7 +159,9 @@ static void signalHandler(int sig, siginfo_t * info, void * 
context) noexcept
 
 /// Avoid link time dependency on DB/Interpreters - will use this function 
only when linked.
 __attribute__((__weak__)) void
-collectGlutenCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, 
const StackTrace & stack_trace);
+collectGlutenCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, 
const StackTrace & stack_trace) {
+    
+}
 
 class SignalListener : public Poco::Runnable
 {
diff --git a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp 
b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
index cf7056b576..e88db19895 100644
--- a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
+++ b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
@@ -964,10 +964,10 @@ jobject create(JNIEnv * env, const SparkRowInfo & 
spark_row_info)
 {
     auto * offsets_arr = env->NewLongArray(spark_row_info.getNumRows());
     const auto * offsets_src = spark_row_info.getOffsets().data();
-    env->SetLongArrayRegion(offsets_arr, 0, spark_row_info.getNumRows(), 
offsets_src);
+    env->SetLongArrayRegion(offsets_arr, 0, spark_row_info.getNumRows(), 
static_cast<const jlong *>(offsets_src));
     auto * lengths_arr = env->NewLongArray(spark_row_info.getNumRows());
     const auto * lengths_src = spark_row_info.getLengths().data();
-    env->SetLongArrayRegion(lengths_arr, 0, spark_row_info.getNumRows(), 
lengths_src);
+    env->SetLongArrayRegion(lengths_arr, 0, spark_row_info.getNumRows(), 
static_cast<const jlong *>(lengths_src));
     int64_t address = 
reinterpret_cast<int64_t>(spark_row_info.getBufferAddress());
     int64_t column_number = spark_row_info.getNumCols();
     int64_t total_size = spark_row_info.getTotalBytes();
diff --git a/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp 
b/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
index 1b4685e4ea..402932f5d7 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/MetaDataHelper.cpp
@@ -157,8 +157,8 @@ void restoreMetaData<LOCAL>(
             return;
 
         // Increase the speed of metadata recovery
-        auto max_concurrency = std::max(10UL, 
QueryContext::globalContext()->getSettingsRef()[Setting::max_threads].value);
-        auto max_threads = std::min(max_concurrency, not_exists_part.size());
+        auto max_concurrency = std::max(static_cast<UInt64>(10), 
QueryContext::globalContext()->getSettingsRef()[Setting::max_threads].value);
+        auto max_threads = std::min(max_concurrency, 
static_cast<UInt64>(not_exists_part.size()));
         FreeThreadPool thread_pool(
             CurrentMetrics::LocalThread,
             CurrentMetrics::LocalThreadActive,
diff --git a/cpp-ch/local-engine/Storages/Output/WriteBufferBuilder.cpp 
b/cpp-ch/local-engine/Storages/Output/WriteBufferBuilder.cpp
index c03b1918d2..fa2fc9d26a 100644
--- a/cpp-ch/local-engine/Storages/Output/WriteBufferBuilder.cpp
+++ b/cpp-ch/local-engine/Storages/Output/WriteBufferBuilder.cpp
@@ -21,7 +21,9 @@
 #include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 #include <Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h>
 #include <Storages/Output/WriteBufferBuilder.h>
+#if USE_HDFS
 #include <hdfs/hdfs.h>
+#endif
 #include <Poco/URI.h>
 #include <Common/CHUtil.h>
 
@@ -101,7 +103,9 @@ void registerWriteBufferBuilders()
     auto & factory = WriteBufferBuilderFactory::instance();
     //TODO: support azure and S3
     factory.registerBuilder("file", [](DB::ContextPtr context_) { return 
std::make_shared<LocalFileWriteBufferBuilder>(context_); });
+#if USE_HDFS
     factory.registerBuilder("hdfs", [](DB::ContextPtr context_) { return 
std::make_shared<HDFSFileWriteBufferBuilder>(context_); });
+#endif
 }
 
 WriteBufferBuilderFactory & WriteBufferBuilderFactory::instance()
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt 
b/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt
index 228f54255c..62de10ec1d 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt
@@ -27,11 +27,15 @@ add_library(substrait_source ${substrait_source_sources})
 target_compile_options(
   substrait_source PRIVATE -Wno-suggest-destructor-override
                            -Wno-inconsistent-missing-destructor-override)
-
-target_link_libraries(
-  substrait_source PUBLIC boost::headers_only ch_contrib::protobuf
-                          clickhouse_common_io ch_contrib::hdfs substrait)
-
+if(ENABLE_HDFS)
+  target_link_libraries(
+    substrait_source PUBLIC boost::headers_only ch_contrib::protobuf
+                            clickhouse_common_io ch_contrib::hdfs substrait)
+else()
+  target_link_libraries(
+    substrait_source PUBLIC boost::headers_only ch_contrib::protobuf
+                            clickhouse_common_io substrait)
+endif()
 target_include_directories(
   substrait_source SYSTEM BEFORE
   PUBLIC ${ARROW_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/arrow-cmake/cpp/src
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp 
b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
index 732518ab77..b7beeb52ca 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ReadBufferBuilder.cpp
@@ -43,7 +43,6 @@
 #include <Storages/SubstraitSource/ReadBufferBuilder.h>
 #include <Storages/SubstraitSource/SubstraitFileSource.h>
 #include <boost/compute/detail/lru_cache.hpp>
-#include <hdfs/hdfs.h>
 #include <sys/stat.h>
 #include <Poco/Logger.h>
 #include <Poco/URI.h>
@@ -64,6 +63,9 @@
 #include <aws/s3/model/ListObjectsV2Request.h>
 #endif
 
+#if USE_HDFS
+#include <hdfs/hdfs.h>
+#endif
 
 namespace DB
 {
@@ -205,12 +207,15 @@ 
adjustReadRangeIfNeeded(std::unique_ptr<SeekableReadBuffer> read_buffer, const s
         file_info.start() + file_info.length(),
         start_end.first,
         start_end.second);
-
+#if USE_HDFS
     /// If read buffer doesn't support right bounded reads, wrap it with 
BoundedReadBuffer to enable right bounded reads.
     if (dynamic_cast<DB::ReadBufferFromHDFS *>(read_buffer.get()) || 
dynamic_cast<DB::AsynchronousReadBufferFromHDFS *>(read_buffer.get())
         || dynamic_cast<DB::ReadBufferFromFile *>(read_buffer.get()))
         read_buffer = 
std::make_unique<DB::BoundedReadBuffer>(std::move(read_buffer));
-
+#else 
+    if (dynamic_cast<DB::ReadBufferFromFile *>(read_buffer.get()))
+        read_buffer = 
std::make_unique<DB::BoundedReadBuffer>(std::move(read_buffer));
+#endif
     read_buffer->seek(start_end.first, SEEK_SET);
     read_buffer->setReadUntilPosition(start_end.second);
     return std::move(read_buffer);
@@ -744,12 +749,18 @@ 
ReadBufferBuilder::wrapWithBzip2(std::unique_ptr<DB::ReadBuffer> in, const subst
         new_end);
 
     std::unique_ptr<SeekableReadBuffer> bounded_in;
+#if USE_HDFS
     if (dynamic_cast<DB::ReadBufferFromHDFS *>(seekable_in.get()) || 
dynamic_cast<DB::AsynchronousReadBufferFromHDFS *>(seekable_in.get())
         || dynamic_cast<DB::ReadBufferFromFile *>(seekable_in.get()))
         bounded_in = 
std::make_unique<BoundedReadBuffer>(std::move(seekable_in));
     else
         bounded_in = std::move(seekable_in);
-
+#else
+    if (dynamic_cast<DB::ReadBufferFromFile *>(seekable_in.get()))
+        bounded_in = 
std::make_unique<BoundedReadBuffer>(std::move(seekable_in));
+    else
+        bounded_in = std::move(seekable_in);
+#endif
     bounded_in->seek(new_start, SEEK_SET);
     bounded_in->setReadUntilPosition(new_end);
     bool first_block_need_special_process = (new_start > 0);
diff --git a/cpp-ch/local-engine/local_engine_jni.cpp 
b/cpp-ch/local-engine/local_engine_jni.cpp
index c39c8925f8..855d2b8371 100644
--- a/cpp-ch/local-engine/local_engine_jni.cpp
+++ b/cpp-ch/local-engine/local_engine_jni.cpp
@@ -1039,7 +1039,7 @@ JNIEXPORT jobject 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
     local_engine::BlockStripes bs = 
local_engine::BlockStripeSplitter::split(*block, partition_col_indice_vec, 
hasBucket, reserve_);
 
     auto * addresses = env->NewLongArray(bs.block_addresses.size());
-    env->SetLongArrayRegion(addresses, 0, bs.block_addresses.size(), 
bs.block_addresses.data());
+    env->SetLongArrayRegion(addresses, 0, bs.block_addresses.size(), 
static_cast<const jlong *>(bs.block_addresses.data()));
     auto * indices = env->NewIntArray(bs.heading_row_indice.size());
     env->SetIntArrayRegion(indices, 0, bs.heading_row_indice.size(), 
bs.heading_row_indice.data());
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to