This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 88dcdfd46 IMPALA-12807: Add support for mold linker
88dcdfd46 is described below

commit 88dcdfd4662d0ec4a6e0d81d643d207f6e5ed900
Author: Joe McDonnell <[email protected]>
AuthorDate: Wed Feb 14 21:57:54 2024 -0800

    IMPALA-12807: Add support for mold linker
    
    This adds support for using the mold linker. It changes
    the existing USE_GOLD_LINKER environment variable to
    IMPALA_LINKER, which accepts ld, gold, or mold as
    values. It defaults to 'gold' to match current behavior.
    Developers can override it in bin/impala-config-local.sh.
    
    Clang does not implement -gz properly until version 12.
    It does not enable compressed debuginfo in the final
    binary. IMPALA_LINKER=mold doesn't work with
    IMPALA_COMPRESSED_DEBUG_INFO=true on Clang due to this.
    This detects Clang <12 and skips -gz as it is ineffective.
    
    Mold follows similar to behavior to LLD and requires
    --exclude-libs to use the full library name (i.e.
    liblz4.a rather than liblz4). Gold will happily
    accept the full library name, so this changes to use
    the full library name.
    
    Mold is much faster for incremental builds on my system:
    (e.g. touch be/src/scheduling/scheduler.cc && make -j8 impalad)
    gold: 15.8s
    mold: 2.6s
    
    Testing:
     - Ran builds with IMPALA_LINKER=mold on Centos 7, Redhat 8,
       and Ubuntu 20.
    
    Change-Id: Ia9e9accd06b6ecd182d200d81afaae09a885c241
    Reviewed-on: http://gerrit.cloudera.org:8080/21121
    Reviewed-by: Michael Smith <[email protected]>
    Reviewed-by: Andrew Sherman <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/CMakeLists.txt             | 28 +++++++++++++++++++++++-----
 be/src/service/CMakeLists.txt | 12 +++++++-----
 bin/bootstrap_toolchain.py    |  6 +++---
 bin/impala-config.sh          |  9 +++++++--
 4 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 9accfa8b1..789f0fa18 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -27,6 +27,13 @@ PROJECT(ASSEMBLER)
 
 option(BUILD_WITH_NO_TESTS "Do not generate test and benchmark targets" OFF)
 
+# Validate the IMPALA_LINKER environment variable
+if (NOT "$ENV{IMPALA_LINKER}" STREQUAL "ld" AND
+    NOT "$ENV{IMPALA_LINKER}" STREQUAL "gold" AND
+    NOT "$ENV{IMPALA_LINKER}" STREQUAL "mold")
+  message(FATAL_ERROR "Invalid IMPALA_LINKER: $ENV{IMPALA_LINKER} (expected: 
ld, gold, or mold)")
+endif()
+
 # compiler flags that are common across debug/release builds
 #  -Wall: Enable all warnings.
 #  -Wno-sign-compare: suppress warnings for comparison between signed and 
unsigned
@@ -58,6 +65,14 @@ SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} 
-DBOOST_ALLOW_DEPRECATED_HEADERS")
 #      built at OS where getrandom(2) is available at OSes where getrandom(2)
 #      isn't supported (e.g., that might happen in containerized deployments).
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} 
-DBOOST_UUID_RANDOM_PROVIDER_FORCE_POSIX")
+IF($ENV{IMPALA_LINKER} STREQUAL "mold")
+  # Only very recent GCC 12+ has support for -fuse-ld=mold, so we override 
"ld" by
+  # putting Mold's libexec/mold directory (which has a "ld" symlink) on the 
path.
+  SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -B 
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/mold-$ENV{IMPALA_MOLD_VERSION}/libexec/mold")
+ENDIF()
+# Note: apart from gold linker, binutils provides an up-to-date "as" utility. 
Older
+# distributions will have an "as" utility too old to process the output from
+# modern GCC.
 SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -B 
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/binutils-$ENV{IMPALA_BINUTILS_VERSION}/bin/")
 #  -Wno-deprecated-declarations: OpenSSL3 deprecated various APIs currently 
used by
 #      Impala, so this disables those warnings when using OpenSSL3 until they 
can be
@@ -65,7 +80,7 @@ SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -B 
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME
 if (OPENSSL_VERSION VERSION_GREATER_EQUAL 3)
   SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
 endif()
-IF($ENV{USE_GOLD_LINKER} STREQUAL "true")
+IF($ENV{IMPALA_LINKER} STREQUAL "gold")
   SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fuse-ld=gold")
 ENDIF()
 
@@ -245,11 +260,14 @@ endif()
 # debug info in the executable. This can reduce the size of binaries by >50%
 # without changing the amount of debug information. gdb is known to work
 # with compressed debug info, but other tools may not know how to use it.
-# TODO: The current version of Clang does not handles this flag correctly and
-# simply produces binaries with uncompressed debug info. This needs further
-# debugging.
 if ($ENV{IMPALA_COMPRESSED_DEBUG_INFO} STREQUAL "true")
-  SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gz")
+  # Clang doesn't handle -gz properly until version 12, so there is no reason 
to keep it.
+  if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang"
+      AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.0)
+    message(STATUS "Detected Clang < 12: -gz is ineffective on this version, 
skipping.")
+  else()
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gz")
+  endif()
 endif()
 
 # Use ccache when found and not explicitly disabled by setting the 
DISABLE_CCACHE envvar.
diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index a6b51395e..dbe2d181c 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -58,11 +58,13 @@ add_library(fesupport SHARED
 # Hide all symbols from compression libraries so that users of this 
libfesupport.so
 # don't have conflicts with libhadoop or other libraries.
 # TODO: This should also hide most other symbols as a precaution.
-set(LINK_FLAGS "-Wl,--exclude-libs=libbz2")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=liblz4")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libsnappy")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libz")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libzstd")
+# Note: Using the full library name with ".a" is needed for lld/mold linkers.
+# It has no impact on gold, which supports with or without the .a.
+set(LINK_FLAGS "-Wl,--exclude-libs=libbz2.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=liblz4.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libsnappy.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libz.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libzstd.a")
 set_target_properties(fesupport
   PROPERTIES
   LINK_FLAGS "${LINK_FLAGS}")
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 2162bc1db..171aec497 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -474,9 +474,9 @@ def get_toolchain_downloads():
   toolchain_packages += [ToolchainPackage(p) for p in
       ["avro", "binutils", "boost", "breakpad", "bzip2", "calloncehack", 
"cctz",
        "cloudflarezlib", "cmake", "crcutil", "curl", "flatbuffers", "gdb", 
"gflags",
-       "glog", "gperftools", "gtest", "jwt-cpp", "libev", "libunwind", "lz4", 
"openldap",
-       "orc", "protobuf", "python", "rapidjson", "re2", "snappy", "tpc-h", 
"tpc-ds",
-       "zlib", "zstd"]]
+       "glog", "gperftools", "gtest", "jwt-cpp", "libev", "libunwind", "lz4", 
"mold",
+       "openldap", "orc", "protobuf", "python", "rapidjson", "re2", "snappy", 
"tpc-h",
+       "tpc-ds", "zlib", "zstd"]]
   python3_package = ToolchainPackage(
       "python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION"))
   toolchain_packages += [python3_package]
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 98c9dda46..30ceb6d1a 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -207,6 +207,8 @@ if [[ $ARCH_NAME == 'aarch64' ]]; then
   export IMPALA_HADOOP_CLIENT_VERSION=3.3.6
   unset IMPALA_HADOOP_CLIENT_URL
 fi
+export IMPALA_MOLD_VERSION=2.4.1
+unset IMPALA_MOLD_URL
 
 # Impala JDBC driver for testing.
 export IMPALA_SIMBA_JDBC_DRIVER_VERSION=42-2.6.32.1041
@@ -531,8 +533,11 @@ chmod 755 "${PYTHON_EGG_CACHE}"
 # If it's 0, Impala will be built with the compiler in the toolchain directory.
 export USE_SYSTEM_GCC=${USE_SYSTEM_GCC-0}
 
-# Use ld.gold instead of ld by default to speed up builds.
-export USE_GOLD_LINKER=${USE_GOLD_LINKER-true}
+# Allow the linker to be set to gold, mold, or regular ld. Gold is the default
+# as it has been for a long time. Mold is a new linker that is faster than 
gold.
+# Note: This is validated in the CMake code.
+# TODO: Add support for lld as well
+export IMPALA_LINKER=${IMPALA_LINKER-gold}
 
 # Override the default compiler by setting a path to the new compiler. The 
default
 # compiler depends on USE_SYSTEM_GCC and IMPALA_GCC_VERSION. The intended use 
case

Reply via email to