This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push: new 0dcd23412 ORC-1639: [C++] Simplify flags in CMake and fix corresponding compiling errors 0dcd23412 is described below commit 0dcd23412900a80df9250c2a10777149ff66d4a8 Author: Gang Wu <ust...@gmail.com> AuthorDate: Tue Feb 27 21:00:03 2024 -0800 ORC-1639: [C++] Simplify flags in CMake and fix corresponding compiling errors ### What changes were proposed in this pull request? - Remove unnecessary compiler flags and replace -Weverything with -Wall. - Fix warnings and errors that emerge after the flag change. ### Why are the changes needed? The C++ library has added -Weverything which results in a bunch of -Wno-xxx flags. We should reduce them as many as possible. ### How was this patch tested? - Tested it locally with clang13, clang17 and gcc13. - Make sure all CIs are passed. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #1826 from wgtmac/ORC-1639. Authored-by: Gang Wu <ust...@gmail.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- CMakeLists.txt | 29 ++++------------------------- c++/src/ColumnPrinter.cc | 2 +- c++/src/ColumnReader.cc | 18 +++++++++--------- c++/src/ColumnWriter.cc | 8 ++++---- c++/src/ConvertColumnReader.cc | 12 ++++++------ c++/src/Murmur3.cc | 6 ++++++ c++/src/Timezone.cc | 2 +- c++/src/TypeImpl.cc | 22 +++++++++++----------- c++/test/TestDecompression.cc | 4 ++-- 9 files changed, 44 insertions(+), 59 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fc18cea1..1fb0e755d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,43 +118,22 @@ if (NOT MSVC) endif () message(STATUS "compiler ${CMAKE_CXX_COMPILER_ID} version ${CMAKE_CXX_COMPILER_VERSION}") if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR - CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") + if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") message(FATAL_ERROR "A c++17-compliant compiler is required, please use at least Clang 5") else () set (CXX17_FLAGS "-std=c++17") endif () - set (WARN_FLAGS "-Weverything -Wno-c++98-compat -Wno-missing-prototypes") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++98-compat-pedantic -Wno-padded") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-covered-switch-default") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-missing-noreturn -Wno-unknown-pragmas") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-gnu-zero-variadic-macro-arguments") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-conversion") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "14.0") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-unused-macros") - endif() - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-reserved-identifier -Wno-suggest-destructor-override -Wno-suggest-override") - endif() - if (CMAKE_HOST_APPLE AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "12.0") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++2a-compat -Wno-unknown-warning-option -Wno-suggest-override -Wno-suggest-destructor-override") - elseif (CMAKE_HOST_APPLE AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "11.0") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++2a-compat") - endif () + set (WARN_FLAGS "-Wall -Wextra") if (STOP_BUILD_ON_WARNING) set (WARN_FLAGS "${WARN_FLAGS} -Werror") endif () elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR - CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") + if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") message(FATAL_ERROR "A c++17-compliant compiler is required, please use at least GCC 5") else () set (CXX17_FLAGS "-std=c++17") endif () - set (WARN_FLAGS "-Wall -Wno-unknown-pragmas -Wno-conversion") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "12.0") - set (WARN_FLAGS "${WARN_FLAGS} -Wno-array-bounds -Wno-stringop-overread") # To compile protobuf in Fedora37 - endif () + set (WARN_FLAGS "-Wall -Wextra") if (STOP_BUILD_ON_WARNING) set (WARN_FLAGS "${WARN_FLAGS} -Werror") endif () diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc index 867f13a24..5297f8037 100644 --- a/c++/src/ColumnPrinter.cc +++ b/c++/src/ColumnPrinter.cc @@ -639,7 +639,7 @@ namespace orc { if (i != 0) { writeString(buffer, ", "); } - const auto numBuffer = std::to_string(static_cast<const int>(start[rowId][i]) & 0xff); + const auto numBuffer = std::to_string(static_cast<int>(start[rowId][i]) & 0xff); writeString(buffer, numBuffer.c_str()); } writeChar(buffer, ']'); diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index 515b0b09d..e9cd88260 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -269,8 +269,8 @@ namespace orc { private: std::unique_ptr<orc::RleDecoder> secondsRle; std::unique_ptr<orc::RleDecoder> nanoRle; - const Timezone& writerTimezone; - const Timezone& readerTimezone; + const Timezone* writerTimezone; + const Timezone* readerTimezone; const int64_t epochOffset; const bool sameTimezone; @@ -288,10 +288,10 @@ namespace orc { TimestampColumnReader::TimestampColumnReader(const Type& type, StripeStreams& stripe, bool isInstantType) : ColumnReader(type, stripe), - writerTimezone(isInstantType ? getTimezoneByName("GMT") : stripe.getWriterTimezone()), - readerTimezone(isInstantType ? getTimezoneByName("GMT") : stripe.getReaderTimezone()), - epochOffset(writerTimezone.getEpoch()), - sameTimezone(&writerTimezone == &readerTimezone) { + writerTimezone(isInstantType ? &getTimezoneByName("GMT") : &stripe.getWriterTimezone()), + readerTimezone(isInstantType ? &getTimezoneByName("GMT") : &stripe.getReaderTimezone()), + epochOffset(writerTimezone->getEpoch()), + sameTimezone(writerTimezone == readerTimezone) { RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind()); std::unique_ptr<SeekableInputStream> stream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); @@ -336,13 +336,13 @@ namespace orc { if (!sameTimezone) { // adjust timestamp value to same wall clock time if writer and reader // time zones have different rules, which is required for Apache Orc. - const auto& wv = writerTimezone.getVariant(writerTime); - const auto& rv = readerTimezone.getVariant(writerTime); + const auto& wv = writerTimezone->getVariant(writerTime); + const auto& rv = readerTimezone->getVariant(writerTime); if (!wv.hasSameTzRule(rv)) { // If the timezone adjustment moves the millis across a DST boundary, // we need to reevaluate the offsets. int64_t adjustedTime = writerTime + wv.gmtOffset - rv.gmtOffset; - const auto& adjustedReader = readerTimezone.getVariant(adjustedTime); + const auto& adjustedReader = readerTimezone->getVariant(adjustedTime); writerTime = writerTime + wv.gmtOffset - adjustedReader.gmtOffset; } } diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc index 5c7ad2179..f24be1f0b 100644 --- a/c++/src/ColumnWriter.cc +++ b/c++/src/ColumnWriter.cc @@ -1643,7 +1643,7 @@ namespace orc { private: RleVersion rleVersion; - const Timezone& timezone; + const Timezone* timezone; const bool isUTC; }; @@ -1651,7 +1651,7 @@ namespace orc { const WriterOptions& options, bool isInstantType) : ColumnWriter(type, factory, options), rleVersion(options.getRleVersion()), - timezone(isInstantType ? getTimezoneByName("GMT") : options.getTimezone()), + timezone(isInstantType ? &getTimezoneByName("GMT") : &options.getTimezone()), isUTC(isInstantType || options.getTimezoneName() == "GMT") { std::unique_ptr<BufferedOutputStream> dataStream = factory.createStream(proto::Stream_Kind_DATA); @@ -1713,7 +1713,7 @@ namespace orc { // TimestampVectorBatch already stores data in UTC int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000; if (!isUTC) { - millsUTC = timezone.convertToUTC(secs[i]) * 1000 + nanos[i] / 1000000; + millsUTC = timezone->convertToUTC(secs[i]) * 1000 + nanos[i] / 1000000; } ++count; if (enableBloomFilter) { @@ -1725,7 +1725,7 @@ namespace orc { secs[i] += 1; } - secs[i] -= timezone.getEpoch(); + secs[i] -= timezone->getEpoch(); nanos[i] = formatNano(nanos[i]); } } diff --git a/c++/src/ConvertColumnReader.cc b/c++/src/ConvertColumnReader.cc index c139cfa9d..d94d1861a 100644 --- a/c++/src/ConvertColumnReader.cc +++ b/c++/src/ConvertColumnReader.cc @@ -399,14 +399,14 @@ namespace orc { ConvertToTimestampColumnReader(const Type& _readType, const Type& fileType, StripeStreams& stripe, bool _throwOnOverflow) : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow), - readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ? getTimezoneByName("GMT") - : stripe.getReaderTimezone()), - needConvertTimezone(&readerTimezone != &getTimezoneByName("GMT")) {} + readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ? &getTimezoneByName("GMT") + : &stripe.getReaderTimezone()), + needConvertTimezone(readerTimezone != &getTimezoneByName("GMT")) {} void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) override; protected: - const orc::Timezone& readerTimezone; + const orc::Timezone* readerTimezone; const bool needConvertTimezone; }; @@ -462,7 +462,7 @@ namespace orc { dstBatch.nanoseconds[idx] = 0; } if (needConvertTimezone) { - dstBatch.data[idx] = readerTimezone.convertFromUTC(dstBatch.data[idx]); + dstBatch.data[idx] = readerTimezone->convertFromUTC(dstBatch.data[idx]); } } @@ -648,7 +648,7 @@ namespace orc { dstBatch.nanoseconds[idx] = fractionPortion.toLong(); if (needConvertTimezone) { - dstBatch.data[idx] = readerTimezone.convertFromUTC(dstBatch.data[idx]); + dstBatch.data[idx] = readerTimezone->convertFromUTC(dstBatch.data[idx]); } } diff --git a/c++/src/Murmur3.cc b/c++/src/Murmur3.cc index 1e4f42f64..518e5e6de 100644 --- a/c++/src/Murmur3.cc +++ b/c++/src/Murmur3.cc @@ -69,16 +69,22 @@ namespace orc { switch (len - idx) { case 7: k ^= static_cast<uint64_t>(data[idx + 6]) << 48; + [[fallthrough]]; case 6: k ^= static_cast<uint64_t>(data[idx + 5]) << 40; + [[fallthrough]]; case 5: k ^= static_cast<uint64_t>(data[idx + 4]) << 32; + [[fallthrough]]; case 4: k ^= static_cast<uint64_t>(data[idx + 3]) << 24; + [[fallthrough]]; case 3: k ^= static_cast<uint64_t>(data[idx + 2]) << 16; + [[fallthrough]]; case 2: k ^= static_cast<uint64_t>(data[idx + 1]) << 8; + [[fallthrough]]; case 1: k ^= static_cast<uint64_t>(data[idx + 0]); diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc index 9f1352e30..27e14480d 100644 --- a/c++/src/Timezone.cc +++ b/c++/src/Timezone.cc @@ -449,7 +449,7 @@ namespace orc { std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) { auto result = std::make_shared<FutureRuleImpl>(); FutureRuleParser parser(ruleString, dynamic_cast<FutureRuleImpl*>(result.get())); - return std::move(result); + return result; } std::string TimezoneVariant::toString() const { diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc index cf8aa0ad7..c427a962b 100644 --- a/c++/src/TypeImpl.cc +++ b/c++/src/TypeImpl.cc @@ -337,7 +337,7 @@ namespace orc { ->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector) .release()); } - return std::move(result); + return result; } case LIST: { @@ -346,7 +346,7 @@ namespace orc { result->elements = getSubtype(0)->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector); } - return std::move(result); + return result; } case MAP: { @@ -359,7 +359,7 @@ namespace orc { result->elements = getSubtype(1)->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector); } - return std::move(result); + return result; } case DECIMAL: { @@ -378,7 +378,7 @@ namespace orc { ->createRowBatch(capacity, memoryPool, encoded, useTightNumericVector) .release()); } - return std::move(result); + return result; } default: @@ -405,14 +405,14 @@ namespace orc { std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) { auto result = std::make_unique<TypeImpl>(LIST); result->addChildType(std::move(elements)); - return std::move(result); + return result; } std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key, std::unique_ptr<Type> value) { auto result = std::make_unique<TypeImpl>(MAP); result->addChildType(std::move(key)); result->addChildType(std::move(value)); - return std::move(result); + return result; } std::unique_ptr<Type> createUnionType() { @@ -567,7 +567,7 @@ namespace orc { const auto& value = fileType->getAttributeValue(key); result->setAttribute(key, value); } - return std::move(result); + return result; } std::unique_ptr<Type> Type::buildTypeFromString(const std::string& input) { @@ -590,7 +590,7 @@ namespace orc { throw std::logic_error("Array type must contain exactly one sub type."); } result->addChildType(std::move(res.first)); - return std::move(result); + return result; } std::unique_ptr<Type> TypeImpl::parseMapType(const std::string& input, size_t start, size_t end) { @@ -608,7 +608,7 @@ namespace orc { } result->addChildType(std::move(key.first)); result->addChildType(std::move(val.first)); - return std::move(result); + return result; } std::pair<std::string, size_t> TypeImpl::parseName(const std::string& input, const size_t start, @@ -671,7 +671,7 @@ namespace orc { ++pos; } - return std::move(result); + return result; } std::unique_ptr<Type> TypeImpl::parseUnionType(const std::string& input, size_t start, @@ -691,7 +691,7 @@ namespace orc { ++pos; } - return std::move(result); + return result; } std::unique_ptr<Type> TypeImpl::parseDecimalType(const std::string& input, size_t start, diff --git a/c++/test/TestDecompression.cc b/c++/test/TestDecompression.cc index c90801d72..e729fc8d8 100644 --- a/c++/test/TestDecompression.cc +++ b/c++/test/TestDecompression.cc @@ -352,7 +352,7 @@ namespace orc { const char* expected = "XXXXabcdabcdABCDABCDwxyzwzyz123"; ASSERT_EQ(strlen(expected), length); for (uint64_t i = 0; i < length; ++i) { - ASSERT_EQ(static_cast<const char>(expected[i]), static_cast<const char*>(ptr)[i]); + ASSERT_EQ(static_cast<char>(expected[i]), static_cast<const char*>(ptr)[i]); } ASSERT_TRUE(!result->Next(&ptr, &length)); } @@ -419,7 +419,7 @@ namespace orc { const char* expected = "XXXXabcdabcdABCDABCDwxyzwzyz123"; ASSERT_EQ(strlen(expected), length); for (uint64_t i = 0; i < length; ++i) { - ASSERT_EQ(static_cast<const char>(expected[i]), static_cast<const char*>(ptr)[i]); + ASSERT_EQ(static_cast<char>(expected[i]), static_cast<const char*>(ptr)[i]); } ASSERT_TRUE(!result->Next(&ptr, &length)); }