This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 0dcd23412 ORC-1639: [C++] Simplify flags in CMake and fix
corresponding compiling errors
0dcd23412 is described below
commit 0dcd23412900a80df9250c2a10777149ff66d4a8
Author: Gang Wu <[email protected]>
AuthorDate: Tue Feb 27 21:00:03 2024 -0800
ORC-1639: [C++] Simplify flags in CMake and fix corresponding compiling
errors
### What changes were proposed in this pull request?
- Remove unnecessary compiler flags and replace -Weverything with -Wall.
- Fix warnings and errors that emerge after the flag change.
### Why are the changes needed?
The C++ library has added -Weverything which results in a bunch of -Wno-xxx
flags. We should reduce them as many as possible.
### How was this patch tested?
- Tested it locally with clang13, clang17 and gcc13.
- Make sure all CIs are passed.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #1826 from wgtmac/ORC-1639.
Authored-by: Gang Wu <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
CMakeLists.txt | 29 ++++-------------------------
c++/src/ColumnPrinter.cc | 2 +-
c++/src/ColumnReader.cc | 18 +++++++++---------
c++/src/ColumnWriter.cc | 8 ++++----
c++/src/ConvertColumnReader.cc | 12 ++++++------
c++/src/Murmur3.cc | 6 ++++++
c++/src/Timezone.cc | 2 +-
c++/src/TypeImpl.cc | 22 +++++++++++-----------
c++/test/TestDecompression.cc | 4 ++--
9 files changed, 44 insertions(+), 59 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7fc18cea1..1fb0e755d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -118,43 +118,22 @@ if (NOT MSVC)
endif ()
message(STATUS "compiler ${CMAKE_CXX_COMPILER_ID} version
${CMAKE_CXX_COMPILER_VERSION}")
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR
- CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+ if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR CMAKE_CXX_COMPILER_VERSION
VERSION_LESS "5.0")
message(FATAL_ERROR "A c++17-compliant compiler is required, please use at
least Clang 5")
else ()
set (CXX17_FLAGS "-std=c++17")
endif ()
- set (WARN_FLAGS "-Weverything -Wno-c++98-compat -Wno-missing-prototypes")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++98-compat-pedantic -Wno-padded")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-covered-switch-default")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-missing-noreturn -Wno-unknown-pragmas")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-gnu-zero-variadic-macro-arguments")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-conversion")
- if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "14.0")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-unused-macros")
- endif()
- if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-reserved-identifier
-Wno-suggest-destructor-override -Wno-suggest-override")
- endif()
- if (CMAKE_HOST_APPLE AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "12.0")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++2a-compat
-Wno-unknown-warning-option -Wno-suggest-override
-Wno-suggest-destructor-override")
- elseif (CMAKE_HOST_APPLE AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER
"11.0")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-c++2a-compat")
- endif ()
+ set (WARN_FLAGS "-Wall -Wextra")
if (STOP_BUILD_ON_WARNING)
set (WARN_FLAGS "${WARN_FLAGS} -Werror")
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR
- CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0")
+ if (CMAKE_CXX_COMPILER_VERSION STREQUAL "" OR CMAKE_CXX_COMPILER_VERSION
VERSION_LESS "5.0")
message(FATAL_ERROR "A c++17-compliant compiler is required, please use at
least GCC 5")
else ()
set (CXX17_FLAGS "-std=c++17")
endif ()
- set (WARN_FLAGS "-Wall -Wno-unknown-pragmas -Wno-conversion")
- if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "12.0")
- set (WARN_FLAGS "${WARN_FLAGS} -Wno-array-bounds -Wno-stringop-overread")
# To compile protobuf in Fedora37
- endif ()
+ set (WARN_FLAGS "-Wall -Wextra")
if (STOP_BUILD_ON_WARNING)
set (WARN_FLAGS "${WARN_FLAGS} -Werror")
endif ()
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index 867f13a24..5297f8037 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -639,7 +639,7 @@ namespace orc {
if (i != 0) {
writeString(buffer, ", ");
}
- const auto numBuffer = std::to_string(static_cast<const
int>(start[rowId][i]) & 0xff);
+ const auto numBuffer =
std::to_string(static_cast<int>(start[rowId][i]) & 0xff);
writeString(buffer, numBuffer.c_str());
}
writeChar(buffer, ']');
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 515b0b09d..e9cd88260 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -269,8 +269,8 @@ namespace orc {
private:
std::unique_ptr<orc::RleDecoder> secondsRle;
std::unique_ptr<orc::RleDecoder> nanoRle;
- const Timezone& writerTimezone;
- const Timezone& readerTimezone;
+ const Timezone* writerTimezone;
+ const Timezone* readerTimezone;
const int64_t epochOffset;
const bool sameTimezone;
@@ -288,10 +288,10 @@ namespace orc {
TimestampColumnReader::TimestampColumnReader(const Type& type,
StripeStreams& stripe,
bool isInstantType)
: ColumnReader(type, stripe),
- writerTimezone(isInstantType ? getTimezoneByName("GMT") :
stripe.getWriterTimezone()),
- readerTimezone(isInstantType ? getTimezoneByName("GMT") :
stripe.getReaderTimezone()),
- epochOffset(writerTimezone.getEpoch()),
- sameTimezone(&writerTimezone == &readerTimezone) {
+ writerTimezone(isInstantType ? &getTimezoneByName("GMT") :
&stripe.getWriterTimezone()),
+ readerTimezone(isInstantType ? &getTimezoneByName("GMT") :
&stripe.getReaderTimezone()),
+ epochOffset(writerTimezone->getEpoch()),
+ sameTimezone(writerTimezone == readerTimezone) {
RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
std::unique_ptr<SeekableInputStream> stream =
stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
@@ -336,13 +336,13 @@ namespace orc {
if (!sameTimezone) {
// adjust timestamp value to same wall clock time if writer and
reader
// time zones have different rules, which is required for Apache Orc.
- const auto& wv = writerTimezone.getVariant(writerTime);
- const auto& rv = readerTimezone.getVariant(writerTime);
+ const auto& wv = writerTimezone->getVariant(writerTime);
+ const auto& rv = readerTimezone->getVariant(writerTime);
if (!wv.hasSameTzRule(rv)) {
// If the timezone adjustment moves the millis across a DST
boundary,
// we need to reevaluate the offsets.
int64_t adjustedTime = writerTime + wv.gmtOffset - rv.gmtOffset;
- const auto& adjustedReader =
readerTimezone.getVariant(adjustedTime);
+ const auto& adjustedReader =
readerTimezone->getVariant(adjustedTime);
writerTime = writerTime + wv.gmtOffset - adjustedReader.gmtOffset;
}
}
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index 5c7ad2179..f24be1f0b 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -1643,7 +1643,7 @@ namespace orc {
private:
RleVersion rleVersion;
- const Timezone& timezone;
+ const Timezone* timezone;
const bool isUTC;
};
@@ -1651,7 +1651,7 @@ namespace orc {
const WriterOptions& options,
bool isInstantType)
: ColumnWriter(type, factory, options),
rleVersion(options.getRleVersion()),
- timezone(isInstantType ? getTimezoneByName("GMT") :
options.getTimezone()),
+ timezone(isInstantType ? &getTimezoneByName("GMT") :
&options.getTimezone()),
isUTC(isInstantType || options.getTimezoneName() == "GMT") {
std::unique_ptr<BufferedOutputStream> dataStream =
factory.createStream(proto::Stream_Kind_DATA);
@@ -1713,7 +1713,7 @@ namespace orc {
// TimestampVectorBatch already stores data in UTC
int64_t millsUTC = secs[i] * 1000 + nanos[i] / 1000000;
if (!isUTC) {
- millsUTC = timezone.convertToUTC(secs[i]) * 1000 + nanos[i] /
1000000;
+ millsUTC = timezone->convertToUTC(secs[i]) * 1000 + nanos[i] /
1000000;
}
++count;
if (enableBloomFilter) {
@@ -1725,7 +1725,7 @@ namespace orc {
secs[i] += 1;
}
- secs[i] -= timezone.getEpoch();
+ secs[i] -= timezone->getEpoch();
nanos[i] = formatNano(nanos[i]);
}
}
diff --git a/c++/src/ConvertColumnReader.cc b/c++/src/ConvertColumnReader.cc
index c139cfa9d..d94d1861a 100644
--- a/c++/src/ConvertColumnReader.cc
+++ b/c++/src/ConvertColumnReader.cc
@@ -399,14 +399,14 @@ namespace orc {
ConvertToTimestampColumnReader(const Type& _readType, const Type& fileType,
StripeStreams& stripe, bool
_throwOnOverflow)
: ConvertColumnReader(_readType, fileType, stripe, _throwOnOverflow),
- readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ?
getTimezoneByName("GMT")
- :
stripe.getReaderTimezone()),
- needConvertTimezone(&readerTimezone != &getTimezoneByName("GMT")) {}
+ readerTimezone(readType.getKind() == TIMESTAMP_INSTANT ?
&getTimezoneByName("GMT")
+ :
&stripe.getReaderTimezone()),
+ needConvertTimezone(readerTimezone != &getTimezoneByName("GMT")) {}
void next(ColumnVectorBatch& rowBatch, uint64_t numValues, char* notNull)
override;
protected:
- const orc::Timezone& readerTimezone;
+ const orc::Timezone* readerTimezone;
const bool needConvertTimezone;
};
@@ -462,7 +462,7 @@ namespace orc {
dstBatch.nanoseconds[idx] = 0;
}
if (needConvertTimezone) {
- dstBatch.data[idx] = readerTimezone.convertFromUTC(dstBatch.data[idx]);
+ dstBatch.data[idx] = readerTimezone->convertFromUTC(dstBatch.data[idx]);
}
}
@@ -648,7 +648,7 @@ namespace orc {
dstBatch.nanoseconds[idx] = fractionPortion.toLong();
if (needConvertTimezone) {
- dstBatch.data[idx] = readerTimezone.convertFromUTC(dstBatch.data[idx]);
+ dstBatch.data[idx] =
readerTimezone->convertFromUTC(dstBatch.data[idx]);
}
}
diff --git a/c++/src/Murmur3.cc b/c++/src/Murmur3.cc
index 1e4f42f64..518e5e6de 100644
--- a/c++/src/Murmur3.cc
+++ b/c++/src/Murmur3.cc
@@ -69,16 +69,22 @@ namespace orc {
switch (len - idx) {
case 7:
k ^= static_cast<uint64_t>(data[idx + 6]) << 48;
+ [[fallthrough]];
case 6:
k ^= static_cast<uint64_t>(data[idx + 5]) << 40;
+ [[fallthrough]];
case 5:
k ^= static_cast<uint64_t>(data[idx + 4]) << 32;
+ [[fallthrough]];
case 4:
k ^= static_cast<uint64_t>(data[idx + 3]) << 24;
+ [[fallthrough]];
case 3:
k ^= static_cast<uint64_t>(data[idx + 2]) << 16;
+ [[fallthrough]];
case 2:
k ^= static_cast<uint64_t>(data[idx + 1]) << 8;
+ [[fallthrough]];
case 1:
k ^= static_cast<uint64_t>(data[idx + 0]);
diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc
index 9f1352e30..27e14480d 100644
--- a/c++/src/Timezone.cc
+++ b/c++/src/Timezone.cc
@@ -449,7 +449,7 @@ namespace orc {
std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) {
auto result = std::make_shared<FutureRuleImpl>();
FutureRuleParser parser(ruleString,
dynamic_cast<FutureRuleImpl*>(result.get()));
- return std::move(result);
+ return result;
}
std::string TimezoneVariant::toString() const {
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index cf8aa0ad7..c427a962b 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -337,7 +337,7 @@ namespace orc {
->createRowBatch(capacity, memoryPool, encoded,
useTightNumericVector)
.release());
}
- return std::move(result);
+ return result;
}
case LIST: {
@@ -346,7 +346,7 @@ namespace orc {
result->elements =
getSubtype(0)->createRowBatch(capacity, memoryPool, encoded,
useTightNumericVector);
}
- return std::move(result);
+ return result;
}
case MAP: {
@@ -359,7 +359,7 @@ namespace orc {
result->elements =
getSubtype(1)->createRowBatch(capacity, memoryPool, encoded,
useTightNumericVector);
}
- return std::move(result);
+ return result;
}
case DECIMAL: {
@@ -378,7 +378,7 @@ namespace orc {
->createRowBatch(capacity, memoryPool, encoded,
useTightNumericVector)
.release());
}
- return std::move(result);
+ return result;
}
default:
@@ -405,14 +405,14 @@ namespace orc {
std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) {
auto result = std::make_unique<TypeImpl>(LIST);
result->addChildType(std::move(elements));
- return std::move(result);
+ return result;
}
std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key,
std::unique_ptr<Type> value) {
auto result = std::make_unique<TypeImpl>(MAP);
result->addChildType(std::move(key));
result->addChildType(std::move(value));
- return std::move(result);
+ return result;
}
std::unique_ptr<Type> createUnionType() {
@@ -567,7 +567,7 @@ namespace orc {
const auto& value = fileType->getAttributeValue(key);
result->setAttribute(key, value);
}
- return std::move(result);
+ return result;
}
std::unique_ptr<Type> Type::buildTypeFromString(const std::string& input) {
@@ -590,7 +590,7 @@ namespace orc {
throw std::logic_error("Array type must contain exactly one sub type.");
}
result->addChildType(std::move(res.first));
- return std::move(result);
+ return result;
}
std::unique_ptr<Type> TypeImpl::parseMapType(const std::string& input,
size_t start, size_t end) {
@@ -608,7 +608,7 @@ namespace orc {
}
result->addChildType(std::move(key.first));
result->addChildType(std::move(val.first));
- return std::move(result);
+ return result;
}
std::pair<std::string, size_t> TypeImpl::parseName(const std::string& input,
const size_t start,
@@ -671,7 +671,7 @@ namespace orc {
++pos;
}
- return std::move(result);
+ return result;
}
std::unique_ptr<Type> TypeImpl::parseUnionType(const std::string& input,
size_t start,
@@ -691,7 +691,7 @@ namespace orc {
++pos;
}
- return std::move(result);
+ return result;
}
std::unique_ptr<Type> TypeImpl::parseDecimalType(const std::string& input,
size_t start,
diff --git a/c++/test/TestDecompression.cc b/c++/test/TestDecompression.cc
index c90801d72..e729fc8d8 100644
--- a/c++/test/TestDecompression.cc
+++ b/c++/test/TestDecompression.cc
@@ -352,7 +352,7 @@ namespace orc {
const char* expected = "XXXXabcdabcdABCDABCDwxyzwzyz123";
ASSERT_EQ(strlen(expected), length);
for (uint64_t i = 0; i < length; ++i) {
- ASSERT_EQ(static_cast<const char>(expected[i]), static_cast<const
char*>(ptr)[i]);
+ ASSERT_EQ(static_cast<char>(expected[i]), static_cast<const
char*>(ptr)[i]);
}
ASSERT_TRUE(!result->Next(&ptr, &length));
}
@@ -419,7 +419,7 @@ namespace orc {
const char* expected = "XXXXabcdabcdABCDABCDwxyzwzyz123";
ASSERT_EQ(strlen(expected), length);
for (uint64_t i = 0; i < length; ++i) {
- ASSERT_EQ(static_cast<const char>(expected[i]), static_cast<const
char*>(ptr)[i]);
+ ASSERT_EQ(static_cast<char>(expected[i]), static_cast<const
char*>(ptr)[i]);
}
ASSERT_TRUE(!result->Next(&ptr, &length));
}