This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 0dcd23412 ORC-1639: [C++] Simplify flags in CMake and fix 
corresponding compiling errors
0dcd23412 is described below

commit 0dcd23412900a80df9250c2a10777149ff66d4a8
Author: Gang Wu <ust...@gmail.com>
AuthorDate: Tue Feb 27 21:00:03 2024 -0800

    ORC-1639: [C++] Simplify flags in CMake and fix corresponding compiling 
errors
    
    ### What changes were proposed in this pull request?
    - Remove unnecessary compiler flags and replace -Weverything with -Wall.
    - Fix warnings and errors that emerge after the flag change.
    
    ### Why are the changes needed?
    The C++ library has added -Weverything which results in a bunch of -Wno-xxx 
flags. We should reduce them as many as possible.
    
    ### How was this patch tested?
    - Tested it locally with clang13, clang17 and gcc13.
    - Make sure all CIs are passed.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #1826 from wgtmac/ORC-1639.
    
    Authored-by: Gang Wu <ust...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 CMakeLists.txt                 | 29 ++++-------------------------
 c++/src/ColumnPrinter.cc       |  2 +-
 c++/src/ColumnReader.cc        | 18 +++++++++---------
 c++/src/ColumnWriter.cc        |  8 ++++----
 c++/src/ConvertColumnReader.cc | 12 ++++++------
 c++/src/Murmur3.cc             |  6 ++++++
 c++/src/Timezone.cc            |  2 +-
 c++/src/TypeImpl.cc            | 22 +++++++++++-----------
 c++/test/TestDecompression.cc  |  4 ++--
 9 files changed, 44 insertions(+), 59 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7fc18cea1..1fb0e755d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -118,43 +118,22 @@ if (NOT MSVC)
 endif ()
 message(STATUS "compiler ${CMAKE_CXX_COMPILER_ID} version 
${CMAKE_CXX_COMPILER_VERSION}")
 if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR
-      CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+  if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR CMAKE_CXX_COMPILER_VERSION 
VERSION_LESS "5.0")
     message(FATAL_ERROR "A c++17-compliant compiler is required, please use at 
least Clang 5")
   else ()
     set (CXX17_FLAGS "-std=c++17")
   endif ()
-  set (WARN_FLAGS "-Weverything -Wno-c++98-compat -Wno-missing-prototypes")
-  set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++98-compat-pedantic -Wno-padded")
-  set (WARN_FLAGS "${WARN_FLAGS} -Wno-covered-switch-default")
-  set (WARN_FLAGS "${WARN_FLAGS} -Wno-missing-noreturn -Wno-unknown-pragmas")
-  set (WARN_FLAGS "${WARN_FLAGS} -Wno-gnu-zero-variadic-macro-arguments")
-  set (WARN_FLAGS "${WARN_FLAGS} -Wno-conversion")
-  if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "14.0")
-    set (WARN_FLAGS "${WARN_FLAGS} -Wno-unused-macros")
-  endif()
-  if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
-    set (WARN_FLAGS "${WARN_FLAGS} -Wno-reserved-identifier 
-Wno-suggest-destructor-override -Wno-suggest-override")
-  endif()
-  if (CMAKE_HOST_APPLE AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "12.0")
-    set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++2a-compat 
-Wno-unknown-warning-option -Wno-suggest-override 
-Wno-suggest-destructor-override")
-  elseif (CMAKE_HOST_APPLE AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 
"11.0")
-    set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++2a-compat")
-  endif ()
+  set (WARN_FLAGS "-Wall -Wextra")
   if (STOP_BUILD_ON_WARNING)
     set (WARN_FLAGS "${WARN_FLAGS} -Werror")
   endif ()
 elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR
-      CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+  if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR CMAKE_CXX_COMPILER_VERSION 
VERSION_LESS "5.0")
     message(FATAL_ERROR "A c++17-compliant compiler is required, please use at 
least GCC 5")
   else ()
     set (CXX17_FLAGS "-std=c++17")
   endif ()
-  set (WARN_FLAGS "-Wall -Wno-unknown-pragmas -Wno-conversion")
-  if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "12.0")
-    set (WARN_FLAGS "${WARN_FLAGS} -Wno-array-bounds -Wno-stringop-overread") 
# To compile protobuf in Fedora37
-  endif ()
+  set (WARN_FLAGS "-Wall -Wextra")
   if (STOP_BUILD_ON_WARNING)
     set (WARN_FLAGS "${WARN_FLAGS} -Werror")
   endif ()
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index 867f13a24..5297f8037 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -639,7 +639,7 @@ namespace orc {
         if (i != 0) {
           writeString(buffer, ", ");
         }
-        const auto numBuffer = std::to_string(static_cast<const 
int>(start[rowId][i]) & 0xff);
+        const auto numBuffer = 
std::to_string(static_cast<int>(start[rowId][i]) & 0xff);
         writeString(buffer, numBuffer.c_str());
       }
       writeChar(buffer, ']');
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 515b0b09d..e9cd88260 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -269,8 +269,8 @@ namespace orc {
    private:
     std::unique_ptr<orc::RleDecoder> secondsRle;
     std::unique_ptr<orc::RleDecoder> nanoRle;
-    const Timezone& writerTimezone;
-    const Timezone& readerTimezone;
+    const Timezone* writerTimezone;
+    const Timezone* readerTimezone;
     const int64_t epochOffset;
     const bool sameTimezone;
 
@@ -288,10 +288,10 @@ namespace orc {
   TimestampColumnReader::TimestampColumnReader(const Type& type, 
StripeStreams& stripe,
                                                bool isInstantType)
       : ColumnReader(type, stripe),
-        writerTimezone(isInstantType ? getTimezoneByName("GMT") : 
stripe.getWriterTimezone()),
-        readerTimezone(isInstantType ? getTimezoneByName("GMT") : 
stripe.getReaderTimezone()),
-        epochOffset(writerTimezone.getEpoch()),
-        sameTimezone(&writerTimezone == &readerTimezone) {
+        writerTimezone(isInstantType ? &getTimezoneByName("GMT") : 
&stripe.getWriterTimezone()),
+        readerTimezone(isInstantType ? &getTimezoneByName("GMT") : 
&stripe.getReaderTimezone()),
+        epochOffset(writerTimezone->getEpoch()),
+        sameTimezone(writerTimezone == readerTimezone) {
     RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
     std::unique_ptr<SeekableInputStream> stream =
         stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
@@ -336,13 +336,13 @@ namespace orc {
         if (!sameTimezone) {
           // adjust timestamp value to same wall clock time if writer and 
reader
           // time zones have different rules, which is required for Apache Orc.
-          const auto& wv = writerTimezone.getVariant(writerTime);
-          const auto& rv = readerTimezone.getVariant(writerTime);
+          const auto& wv = writerTimezone->getVariant(writerTime);
+          const auto& rv = readerTimezone->getVariant(writerTime);
           if (!wv.hasSameTzRule(rv)) {
             // If the timezone adjustment moves the millis across a DST 
boundary,
             // we need to reevaluate the offsets.
             int64_t adjustedTime = writerTime + wv.gmtOffset - rv.gmtOffset;
-            const auto& adjustedReader = 
readerTimezone.getVariant(adjustedTime);
+            const auto& adjustedReader = 
readerTimezone->getVariant(adjustedTime);
             writerTime = writerTime + wv.gmtOffset - adjustedReader.gmtOffset;
           }
         }
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index 5c7ad2179..f24be1f0b 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -1643,7 +1643,7 @@ namespace orc {
 
    private:
     RleVersion rleVersion;
-    const Timezone& timezone;
+    const Timezone* timezone;
     const bool isUTC;
   };
 
@@ -1651,7 +1651,7 @@ namespace orc {
                                                const WriterOptions& options, 
bool isInstantType)
       : ColumnWriter(type, factory, options),
         rleVersion(options.getRleVersion()),
-        timezone(isInstantType ? getTimezoneByName("GMT") : 
options.getTimezone()),
+        timezone(isInstantType ? &getTimezoneByName("GMT") : 
&options.getTimezone()),
         isUTC(isInstantType || options.getTimezoneName() == "GMT") {
     std::unique_ptr<BufferedOutputStream> dataStream =
         factory.createStream(proto::Stream_Kind_DATA);
@@ -1713,7 +1713,7 @@ namespace orc {
         // TimestampVectorBatch already stores data in UTC
         int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000;
         if (!isUTC) {
-          millsUTC = timezone.convertToUTC(secs[i]) * 1000 + nanos[i] / 
1000000;
+          millsUTC = timezone->convertToUTC(secs[i]) * 1000 + nanos[i] / 
1000000;
         }
         ++count;
         if (enableBloomFilter) {
@@ -1725,7 +1725,7 @@ namespace orc {
           secs[i] += 1;
         }
 
-        secs[i] -= timezone.getEpoch();
+        secs[i] -= timezone->getEpoch();
         nanos[i] = formatNano(nanos[i]);
       }
     }
diff --git a/c++/src/ConvertColumnReader.cc b/c++/src/ConvertColumnReader.cc
index c139cfa9d..d94d1861a 100644
--- a/c++/src/ConvertColumnReader.cc
+++ b/c++/src/ConvertColumnReader.cc
@@ -399,14 +399,14 @@ namespace orc {
     ConvertToTimestampColumnReader(const Type& _readType, const Type& fileType,
                                    StripeStreams& stripe, bool 
_throwOnOverflow)
         : ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow),
-          readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ? 
getTimezoneByName("GMT")
-                                                                 : 
stripe.getReaderTimezone()),
-          needConvertTimezone(&readerTimezone != &getTimezoneByName("GMT")) {}
+          readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ? 
&getTimezoneByName("GMT")
+                                                                 : 
&stripe.getReaderTimezone()),
+          needConvertTimezone(readerTimezone != &getTimezoneByName("GMT")) {}
 
     void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull) 
override;
 
    protected:
-    const orc::Timezone& readerTimezone;
+    const orc::Timezone* readerTimezone;
     const bool needConvertTimezone;
   };
 
@@ -462,7 +462,7 @@ namespace orc {
       dstBatch.nanoseconds[idx] = 0;
     }
     if (needConvertTimezone) {
-      dstBatch.data[idx] = readerTimezone.convertFromUTC(dstBatch.data[idx]);
+      dstBatch.data[idx] = readerTimezone->convertFromUTC(dstBatch.data[idx]);
     }
   }
 
@@ -648,7 +648,7 @@ namespace orc {
       dstBatch.nanoseconds[idx] = fractionPortion.toLong();
 
       if (needConvertTimezone) {
-        dstBatch.data[idx] = readerTimezone.convertFromUTC(dstBatch.data[idx]);
+        dstBatch.data[idx] = 
readerTimezone->convertFromUTC(dstBatch.data[idx]);
       }
     }
 
diff --git a/c++/src/Murmur3.cc b/c++/src/Murmur3.cc
index 1e4f42f64..518e5e6de 100644
--- a/c++/src/Murmur3.cc
+++ b/c++/src/Murmur3.cc
@@ -69,16 +69,22 @@ namespace orc {
     switch (len - idx) {
       case 7:
         k ^= static_cast<uint64_t>(data[idx + 6]) << 48;
+        [[fallthrough]];
       case 6:
         k ^= static_cast<uint64_t>(data[idx + 5]) << 40;
+        [[fallthrough]];
       case 5:
         k ^= static_cast<uint64_t>(data[idx + 4]) << 32;
+        [[fallthrough]];
       case 4:
         k ^= static_cast<uint64_t>(data[idx + 3]) << 24;
+        [[fallthrough]];
       case 3:
         k ^= static_cast<uint64_t>(data[idx + 2]) << 16;
+        [[fallthrough]];
       case 2:
         k ^= static_cast<uint64_t>(data[idx + 1]) << 8;
+        [[fallthrough]];
       case 1:
         k ^= static_cast<uint64_t>(data[idx + 0]);
 
diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc
index 9f1352e30..27e14480d 100644
--- a/c++/src/Timezone.cc
+++ b/c++/src/Timezone.cc
@@ -449,7 +449,7 @@ namespace orc {
   std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) {
     auto result = std::make_shared<FutureRuleImpl>();
     FutureRuleParser parser(ruleString, 
dynamic_cast<FutureRuleImpl*>(result.get()));
-    return std::move(result);
+    return result;
   }
 
   std::string TimezoneVariant::toString() const {
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index cf8aa0ad7..c427a962b 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -337,7 +337,7 @@ namespace orc {
                   ->createRowBatch(capacity, memoryPool, encoded, 
useTightNumericVector)
                   .release());
         }
-        return std::move(result);
+        return result;
       }
 
       case LIST: {
@@ -346,7 +346,7 @@ namespace orc {
           result->elements =
               getSubtype(0)->createRowBatch(capacity, memoryPool, encoded, 
useTightNumericVector);
         }
-        return std::move(result);
+        return result;
       }
 
       case MAP: {
@@ -359,7 +359,7 @@ namespace orc {
           result->elements =
               getSubtype(1)->createRowBatch(capacity, memoryPool, encoded, 
useTightNumericVector);
         }
-        return std::move(result);
+        return result;
       }
 
       case DECIMAL: {
@@ -378,7 +378,7 @@ namespace orc {
                   ->createRowBatch(capacity, memoryPool, encoded, 
useTightNumericVector)
                   .release());
         }
-        return std::move(result);
+        return result;
       }
 
       default:
@@ -405,14 +405,14 @@ namespace orc {
   std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) {
     auto result = std::make_unique<TypeImpl>(LIST);
     result->addChildType(std::move(elements));
-    return std::move(result);
+    return result;
   }
 
   std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key, 
std::unique_ptr<Type> value) {
     auto result = std::make_unique<TypeImpl>(MAP);
     result->addChildType(std::move(key));
     result->addChildType(std::move(value));
-    return std::move(result);
+    return result;
   }
 
   std::unique_ptr<Type> createUnionType() {
@@ -567,7 +567,7 @@ namespace orc {
       const auto& value = fileType->getAttributeValue(key);
       result->setAttribute(key, value);
     }
-    return std::move(result);
+    return result;
   }
 
   std::unique_ptr<Type> Type::buildTypeFromString(const std::string& input) {
@@ -590,7 +590,7 @@ namespace orc {
       throw std::logic_error("Array type must contain exactly one sub type.");
     }
     result->addChildType(std::move(res.first));
-    return std::move(result);
+    return result;
   }
 
   std::unique_ptr<Type> TypeImpl::parseMapType(const std::string& input, 
size_t start, size_t end) {
@@ -608,7 +608,7 @@ namespace orc {
     }
     result->addChildType(std::move(key.first));
     result->addChildType(std::move(val.first));
-    return std::move(result);
+    return result;
   }
 
   std::pair<std::string, size_t> TypeImpl::parseName(const std::string& input, 
const size_t start,
@@ -671,7 +671,7 @@ namespace orc {
       ++pos;
     }
 
-    return std::move(result);
+    return result;
   }
 
   std::unique_ptr<Type> TypeImpl::parseUnionType(const std::string& input, 
size_t start,
@@ -691,7 +691,7 @@ namespace orc {
       ++pos;
     }
 
-    return std::move(result);
+    return result;
   }
 
   std::unique_ptr<Type> TypeImpl::parseDecimalType(const std::string& input, 
size_t start,
diff --git a/c++/test/TestDecompression.cc b/c++/test/TestDecompression.cc
index c90801d72..e729fc8d8 100644
--- a/c++/test/TestDecompression.cc
+++ b/c++/test/TestDecompression.cc
@@ -352,7 +352,7 @@ namespace orc {
     const char* expected = "XXXXabcdabcdABCDABCDwxyzwzyz123";
     ASSERT_EQ(strlen(expected), length);
     for (uint64_t i = 0; i < length; ++i) {
-      ASSERT_EQ(static_cast<const char>(expected[i]), static_cast<const 
char*>(ptr)[i]);
+      ASSERT_EQ(static_cast<char>(expected[i]), static_cast<const 
char*>(ptr)[i]);
     }
     ASSERT_TRUE(!result->Next(&ptr, &length));
   }
@@ -419,7 +419,7 @@ namespace orc {
     const char* expected = "XXXXabcdabcdABCDABCDwxyzwzyz123";
     ASSERT_EQ(strlen(expected), length);
     for (uint64_t i = 0; i < length; ++i) {
-      ASSERT_EQ(static_cast<const char>(expected[i]), static_cast<const 
char*>(ptr)[i]);
+      ASSERT_EQ(static_cast<char>(expected[i]), static_cast<const 
char*>(ptr)[i]);
     }
     ASSERT_TRUE(!result->Next(&ptr, &length));
   }

Reply via email to