This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new cff286e75 IMPALA-9999: Switch to GCC 10.4
cff286e75 is described below
commit cff286e7512e9d1e2ff2b4ea033d3e575f54b353
Author: Joe McDonnell <[email protected]>
AuthorDate: Fri Jan 7 18:01:48 2022 -0800
IMPALA-9999: Switch to GCC 10.4
This upgrades GCC and libstdc++ to version 10.4. This
required patching or upgrading several dependencies
so they could compile with GCC 10. The toolchain
companion change has details on what items needed
to be upgraded and why.
The toolchain companion change switches GCC to build
with toolchain binutils rather than host binutils. This
means that the python virtualenv initialization needs
to include binutils on the path.
This disables two warnings introduced in the new GCC
versions (Wclass-memaccess and Winit-list-lifetime).
These two warnings occur in our code and also in
dependencies like LLVM and rapidjson. These are not
critical warnings, so they can be addressed
independently and reenabled later.
Binary sizes increase, particulary when including
debug symbols:
| GCC 7.5 | GCC 10.4
impalad RELEASE stripped | 83204768 | 88702824
impalad RELEASE | 707278904 | 971711456
impalad DEBUG stripped | 106677672 | 97391944
impalad DEBUG | 725864760 | 867647512
Testing:
- Multiple test jobs (core, release exhaustive, ASAN)
- Performance testing for TPC-H and TPC-DS shows
a modest improvement (2-4%).
- Code compiles without warnings on debug and release
Change-Id: Ibe6857b822925226d39fd4d6413457ef6bbaabec
Reviewed-on: http://gerrit.cloudera.org:8080/18134
Reviewed-by: Michael Smith <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Csaba Ringhofer <[email protected]>
---
be/CMakeLists.txt | 10 ++++++++++
be/src/codegen/llvm-codegen-test.cc | 10 +++++++---
be/src/runtime/string-value.inline.h | 6 +++++-
be/src/udf/udf.cc | 7 +++++--
be/src/util/parquet-reader.cc | 5 +++++
bin/impala-config.sh | 24 ++++++++++++------------
fe/pom.xml | 4 ++--
infra/python/bootstrap_virtualenv.py | 5 +++++
8 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index fa8341feb..77d5c3050 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -101,6 +101,16 @@ SET(CXX_CLANG_FLAGS "${CXX_CLANG_FLAGS} -DCALLONCEHACK")
# versions of GDB. DWARF 4 requires GDB 7.0 or above.
# -Wno-maybe-unitialized: Do not warn for variables that might be
uninitialized
SET(CXX_GCC_FLAGS "-g -Wno-unused-local-typedefs -gdwarf-4
-Wno-maybe-uninitialized")
+# There are some GCC warnings added in recent versions that current code hits.
+# These can be addressed over time, because they also appear in the headers of
+# some of our dependencies:
+# -Wno-class-memaccess: This warning was added in GCC 8. This impacts a lot
of
+# locations (e.g. Tuple and TimestampValue) as well as rapidjson. This
warning
+# doesn't seem particularly useful for us.
+# -Wno-init-list-lifetime: This warning was added in GCC 9, and several code
pieces
+# are not clean yet (including some LLVM code).
+# TODO: These should be cleaned up and reenabled.
+SET(CXX_GCC_FLAGS "${CXX_GCC_FLAGS} -Wno-class-memaccess
-Wno-init-list-lifetime")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
# We need to add additional arguments for GCC 7+. We go down this branch if
building
# with a non-GCC compiler of version 7+, but in that case CXX_GCC_FLAGS is
not used,
diff --git a/be/src/codegen/llvm-codegen-test.cc
b/be/src/codegen/llvm-codegen-test.cc
index e1704506a..0e33aa912 100644
--- a/be/src/codegen/llvm-codegen-test.cc
+++ b/be/src/codegen/llvm-codegen-test.cc
@@ -399,9 +399,13 @@ TEST_F(LlvmCodeGenTest, StringValue) {
EXPECT_EQ(static_cast<void*>(str_val.ptr), static_cast<const
void*>(str.c_str()));
// After IMPALA-7367 removed the padding from the StringValue struct,
validate the
- // length byte alone.
- int32_t* bytes = reinterpret_cast<int32_t*>(&str_val);
- EXPECT_EQ(1, bytes[2]); // str_val.len
+ // length byte alone. To avoid warnings about constructing a pointer into a
packed
+ // struct (Waddress-of-packed-member), this needs to copy the bytes out to a
+ // temporary variable.
+ int8_t* bytes = reinterpret_cast<int8_t*>(&str_val);
+ int32_t len = 0;
+ memcpy(static_cast<void*>(&len), static_cast<void*>(&bytes[8]),
sizeof(int32_t));
+ EXPECT_EQ(1, len); // str_val.len
codegen->Close();
}
diff --git a/be/src/runtime/string-value.inline.h
b/be/src/runtime/string-value.inline.h
index d76baee85..d1d44c839 100644
--- a/be/src/runtime/string-value.inline.h
+++ b/be/src/runtime/string-value.inline.h
@@ -37,7 +37,11 @@ namespace impala {
/// - len: min(n1, n2) - this can be more cheaply passed in by the caller
static inline int StringCompare(const char* s1, int n1, const char* s2, int
n2, int len) {
// memcmp has undefined behavior when called on nullptr for either pointer
- const int result = (len == 0) ? 0 : memcmp(s1, s2, len);
+ //
+ // GCC gives a warning about overflowing the size argument of memcmp, because
+ // it thinks 'len' can be negative. 'len' is never negative, so, this just
uses
+ // len <= 0 and returns 0 for that case to avoid the warning.
+ const int result = (len <= 0) ? 0 : memcmp(s1, s2, len);
if (result != 0) return result;
return n1 - n2;
}
diff --git a/be/src/udf/udf.cc b/be/src/udf/udf.cc
index e636f8193..409b71994 100644
--- a/be/src/udf/udf.cc
+++ b/be/src/udf/udf.cc
@@ -109,8 +109,11 @@ class RuntimeState {
return false;
}
- const std::string connected_user() const { return ""; }
- const std::string GetEffectiveUser() const { return ""; }
+ const std::string connected_user() const { return user_string_; }
+ const std::string GetEffectiveUser() const { return user_string_; }
+
+ private:
+ const std::string user_string_ = "";
};
}
diff --git a/be/src/util/parquet-reader.cc b/be/src/util/parquet-reader.cc
index e8b16cf14..3bdd488e2 100644
--- a/be/src/util/parquet-reader.cc
+++ b/be/src/util/parquet-reader.cc
@@ -222,6 +222,11 @@ int main(int argc, char** argv) {
FileMetaData file_metadata;
bool status = DeserializeThriftMsg(metadata, &metadata_len, true,
&file_metadata);
assert(status);
+ // This code fixes a warning about status being unused. DeserializeThriftMsg
already
+ // would have printed an error, so just return.
+ if (!status) {
+ return -1;
+ }
cerr << ThriftDebugString(file_metadata) << endl;
cerr << "Schema: " << endl << GetSchema(file_metadata) << endl;
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 1cbb46c9f..b4f2e5d85 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -77,16 +77,16 @@ export IMPALA_VERSION=4.2.0-SNAPSHOT
# moving to a different build of the toolchain, e.g. when a version is bumped
or a
# compile option is changed. The build id can be found in the output of the
toolchain
# build jobs, it is constructed from the build number and toolchain git hash
prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=207-21f1a0057a
+export IMPALA_TOOLCHAIN_BUILD_ID=215-9ec4f91d2d
# Versions of toolchain dependencies.
# -----------------------------------
export IMPALA_AVRO_VERSION=1.7.4-p5
unset IMPALA_AVRO_URL
-export IMPALA_BINUTILS_VERSION=2.28
+export IMPALA_BINUTILS_VERSION=2.35.1
unset IMPALA_BINUTILS_URL
export IMPALA_BOOST_VERSION=1.74.0-p1
unset IMPALA_BOOST_URL
-export IMPALA_BREAKPAD_VERSION=97a98836768f8f0154f8f86e5e14c2bb7e74132e-p2
+export IMPALA_BREAKPAD_VERSION=e09741c609dcd5f5274d40182c5e2cc9a002d5ba-p2
unset IMPALA_BREAKPAD_URL
export IMPALA_BZIP2_VERSION=1.0.8-p2
unset IMPALA_BZIP2_URL
@@ -94,17 +94,17 @@ export IMPALA_CCTZ_VERSION=2.2
unset IMPALA_CCTZ_URL
export IMPALA_CMAKE_VERSION=3.22.2
unset IMPALA_CMAKE_URL
-export IMPALA_CRCUTIL_VERSION=440ba7babeff77ffad992df3a10c767f184e946e-p2
+export IMPALA_CRCUTIL_VERSION=2903870057d2f1f109b245650be29e856dc8b646
unset IMPALA_CRCUTIL_URL
export IMPALA_CURL_VERSION=7.78.0
unset IMPALA_CURL_URL
export IMPALA_CYRUS_SASL_VERSION=2.1.23
unset IMPALA_CYRUS_SASL_URL
-export IMPALA_FLATBUFFERS_VERSION=1.6.0
+export IMPALA_FLATBUFFERS_VERSION=1.12.0
unset IMPALA_FLATBUFFERS_URL
-export IMPALA_GCC_VERSION=7.5.0
+export IMPALA_GCC_VERSION=10.4.0
unset IMPALA_GCC_URL
-export IMPALA_GDB_VERSION=7.9.1-p1
+export IMPALA_GDB_VERSION=12.1
unset IMPALA_GDB_URL
export IMPALA_GFLAGS_VERSION=2.2.0-p2
unset IMPALA_GFLAGS_URL
@@ -118,11 +118,11 @@ export IMPALA_JWT_CPP_VERSION=0.5.0
unset IMPALA_JWT_CPP_URL
export IMPALA_LIBEV_VERSION=4.20-p1
unset IMPALA_LIBEV_URL
-export IMPALA_LIBUNWIND_VERSION=1.3-rc1-p3
+export IMPALA_LIBUNWIND_VERSION=1.5.0-p1
unset IMPALA_LIBUNWIND_URL
-export IMPALA_LLVM_VERSION=5.0.1-p4
+export IMPALA_LLVM_VERSION=5.0.1-p5
unset IMPALA_LLVM_URL
-export IMPALA_LLVM_ASAN_VERSION=5.0.1-p4
+export IMPALA_LLVM_ASAN_VERSION=5.0.1-p5
unset IMPALA_LLVM_ASAN_URL
# Maximum memory available for mini-cluster and CDH cluster
@@ -135,7 +135,7 @@ export IMPALA_LLVM_UBSAN_BASE_VERSION=5.0.1
# Debug builds should use the release+asserts build to get additional coverage.
# Don't use the LLVM debug build because the binaries are too large to
distribute.
-export IMPALA_LLVM_DEBUG_VERSION=5.0.1-asserts-p4
+export IMPALA_LLVM_DEBUG_VERSION=5.0.1-asserts-p5
unset IMPALA_LLVM_DEBUG_URL
export IMPALA_LZ4_VERSION=1.9.3
unset IMPALA_LZ4_URL
@@ -164,7 +164,7 @@ unset IMPALA_SNAPPY_URL
export IMPALA_SQUEASEL_VERSION=3.3
unset IMPALA_SQUEASEL_URL
# TPC utilities used for test/benchmark data generation.
-export IMPALA_TPC_DS_VERSION=2.1.0
+export IMPALA_TPC_DS_VERSION=2.1.0-p1
unset IMPALA_TPC_DS_URL
export IMPALA_TPC_H_VERSION=2.17.0
unset IMPALA_TPC_H_URL
diff --git a/fe/pom.xml b/fe/pom.xml
index 8bb051c84..863f285f8 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -349,9 +349,9 @@ under the License.
</dependency>
<dependency>
- <groupId>com.github.davidmoten</groupId>
+ <groupId>com.google.flatbuffers</groupId>
<artifactId>flatbuffers-java</artifactId>
- <version>1.6.0.1</version>
+ <version>1.12.0</version>
</dependency>
<dependency>
diff --git a/infra/python/bootstrap_virtualenv.py
b/infra/python/bootstrap_virtualenv.py
index 36aee7e8d..d292e17cd 100644
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -152,6 +152,11 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
current process's command line arguments are inherited.'''
if not env: env = dict(os.environ)
env["CC"] = cc
+ # Since gcc is now built with toolchain binutils which may be newer than the
+ # system binutils, we need to include the toolchain binutils on the PATH.
+ toolchain_binutils_dir = toolchain_pkg_dir("binutils")
+ binutils_bin_dir = os.path.join(toolchain_binutils_dir, "bin")
+ env["PATH"] = "{0}:{1}".format(binutils_bin_dir, env["PATH"])
# Parallelize the slow numpy build.
# Use getconf instead of nproc because it is supported more widely, e.g. on
older