This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 545ead3ac ORC-1687: [C++] Enforce naming style
545ead3ac is described below
commit 545ead3ac0deb7ec9b1602e81df18e0c2864ea7a
Author: ffacs <ffacs@[email protected]>
AuthorDate: Wed Apr 17 18:05:43 2024 +0800
ORC-1687: [C++] Enforce naming style
### What changes were proposed in this pull request?
1. Add `make check-format`, `make check-clang-tidy`,`make format` and `make
fix-clang-tidy`
2. Add a github action that runs check-clang-tidy
3. Fix naming style errors in `c++/src/OrcHdfsFile.cc`
### Why are the changes needed?
To enforce identifiers' naming style on c++ side.
### How was this patch tested?
UT passed
### Was this patch authored or co-authored using generative AI tooling?
NO
Closes #1888 from ffacs/NamingStyle.
Lead-authored-by: ffacs <ffacs@[email protected]>
Co-authored-by: ffacs <[email protected]>
Co-authored-by: ffacs <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
.clang-tidy | 5 +-
.github/workflows/build_and_test.yml | 25 ++--
CMakeLists.txt | 3 +
c++/build-support/REAMD.md | 30 +++++
c++/build-support/run_clang_format.py | 132 +++++++++++++++++++
.../build-support/run_clang_tidy.py | 0
c++/src/CpuInfoUtil.cc | 141 ++++++++++-----------
c++/src/OrcHdfsFile.cc | 48 +++----
cmake_modules/CheckFormat.cmake | 111 ++++++++++++++++
9 files changed, 386 insertions(+), 109 deletions(-)
diff --git a/.clang-tidy b/.clang-tidy
index bd995bca5..b401f8948 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -21,13 +21,14 @@ Checks: "-*,
CheckOptions:
[
+ { key: readability-identifier-naming.ParameterCase, value: "camelBack" },
+ { key: readability-identifier-naming.PrivateMemberCase, value:
"camelBack"},
{ key: readability-identifier-naming.PrivateMemberSuffix, value: "_" },
{ key: readability-identifier-naming.ProtectedMemberSuffix, value: "" },
{ key: readability-identifier-naming.PublicMemberSuffix, value: "" },
- { key: readability-identifier-naming.ParameterCase, value: "camelBack" },
{ key: readability-identifier-naming.ParameterIgnoredRegexp, value:
"^[a-zA-Z]$" },
]
WarningsAsErrors: ''
-HeaderFilterRegex: '.*'
+HeaderFilterRegex: '(orc/c\+\+/|orc/tools)'
FormatStyle: none
\ No newline at end of file
diff --git a/.github/workflows/build_and_test.yml
b/.github/workflows/build_and_test.yml
index b0350193b..533ec62ce 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -169,19 +169,20 @@ jobs:
formatting-check:
name: "C++ format check"
- runs-on: ubuntu-20.04
- strategy:
- matrix:
- path:
- - 'c++'
- - 'tools'
+ runs-on: ubuntu-22.04
steps:
- - uses: actions/checkout@v3
- - name: Run clang-format style check for C++ code
- uses: jidicula/[email protected]
- with:
- clang-format-version: '13'
- check-path: ${{ matrix.path }}
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ - name: Run build
+ run: |
+ mkdir build
+ cd build
+ cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++
-DCMAKE_C_COMPILER=clang -DSTOP_BUILD_ON_WARNING=OFF
-DCMAKE_EXPORT_COMPILE_COMMANDS=1
+ make
+ - name: Check clang-tidy
+ run: cd build && make check-clang-tidy
+ - name: Check clang-format
+ run: cd build && make check-format
license-check:
name: "License Check"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index af5b9f406..63e144b93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,7 @@ SET(CPACK_PACKAGE_VERSION_MINOR "1")
SET(CPACK_PACKAGE_VERSION_PATCH "0-SNAPSHOT")
SET(ORC_VERSION
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}
"${PROJECT_SOURCE_DIR}/cmake_modules")
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy.
option (BUILD_JAVA
"Include ORC Java library in the build process"
@@ -210,3 +211,5 @@ if (BUILD_CPP_TESTS)
)
endif ()
endif ()
+
+INCLUDE(CheckFormat)
\ No newline at end of file
diff --git a/c++/build-support/REAMD.md b/c++/build-support/REAMD.md
new file mode 100644
index 000000000..0ffad788e
--- /dev/null
+++ b/c++/build-support/REAMD.md
@@ -0,0 +1,30 @@
+# Build support
+
+The Python scripts under the folder provide capabilities for formatting code.
+Make sure you've installed `clang-format-13`, `clang-tidy-13` and
`clang-apply-replacements-13` and cmake could find them.
+We enforce the version of tools because different versions of tools may
generate different results.
+
+## clang-format
+
+To use `run_clang_format.py` you could act like below:
+
+```shell
+mkdir build
+cd build
+cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++
-DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1
+make check-format # Do checks only
+make format # This would apply suggested changes, take care!
+```
+
+## clang-tidy
+
+To use `run_clang_tidy.py` you could act like below:
+
+```shell
+mkdir build
+cd build
+cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++
-DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1
+make -j`nproc` # Important
+make check-clang-tidy # Do checks only
+make fix-clang-tidy # This would apply suggested changes, take care!
+```
diff --git a/c++/build-support/run_clang_format.py
b/c++/build-support/run_clang_format.py
new file mode 100644
index 000000000..52d2e6b25
--- /dev/null
+++ b/c++/build-support/run_clang_format.py
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import codecs
+import difflib
+import fnmatch
+import os
+import subprocess
+import sys
+
+
+def check(arguments, source_dir):
+ formatted_filenames = []
+ error = False
+ for directory, subdirs, filenames in os.walk(source_dir):
+ fullpaths = (os.path.join(directory, filename)
+ for filename in filenames)
+ source_files = [x for x in fullpaths
+ if x.endswith(".hh") or x.endswith(".cc")]
+ formatted_filenames.extend(
+ # Filter out files that match the globs in the globs file
+ [filename for filename in source_files
+ if not any((fnmatch.fnmatch(filename, exclude_glob)
+ for exclude_glob in exclude_globs))])
+
+ if arguments.fix:
+ if not arguments.quiet:
+ # Print out each file on its own line, but run
+ # clang format once for all of the files
+ print("\n".join(map(lambda x: "Formatting {}".format(x),
+ formatted_filenames)))
+ subprocess.check_call([arguments.clang_format_binary,
+ "-i"] + formatted_filenames)
+ else:
+ for filename in formatted_filenames:
+ if not arguments.quiet:
+ print("Checking {}".format(filename))
+ #
+ # Due to some incompatibilities between Python 2 and
+ # Python 3, there are some specific actions we take here
+ # to make sure the difflib.unified_diff call works.
+ #
+ # In Python 2, the call to subprocess.check_output return
+ # a 'str' type. In Python 3, however, the call returns a
+ # 'bytes' type unless the 'encoding' argument is
+ # specified. Unfortunately, the 'encoding' argument is not
+ # in the Python 2 API. We could do an if/else here based
+ # on the version of Python we are running, but it's more
+ # straightforward to read the file in binary and do utf-8
+ # conversion. In Python 2, it's just converting string
+ # types to unicode types, whereas in Python 3 it's
+ # converting bytes types to utf-8 encoded str types. This
+ # approach ensures that the arguments to
+ # difflib.unified_diff are acceptable string types in both
+ # Python 2 and Python 3.
+ with open(filename, "rb") as reader:
+ # Run clang-format and capture its output
+ formatted = subprocess.check_output(
+ [arguments.clang_format_binary,
+ filename])
+ formatted = codecs.decode(formatted, "utf-8")
+ # Read the original file
+ original = codecs.decode(reader.read(), "utf-8")
+ # Run the equivalent of diff -u
+ diff = list(difflib.unified_diff(
+ original.splitlines(True),
+ formatted.splitlines(True),
+ fromfile=filename,
+ tofile="{} (after clang format)".format(
+ filename)))
+ if diff:
+ print("{} had clang-format style issues".format(filename))
+ # Print out the diff to stderr
+ error = True
+ sys.stderr.writelines(diff)
+ return error
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Runs clang format on all of the source "
+ "files. If --fix is specified, and compares the output "
+ "with the existing file, outputting a unifiied diff if "
+ "there are any necessary changes")
+ parser.add_argument("clang_format_binary",
+ help="Path to the clang-format binary")
+ parser.add_argument("--exclude_globs",
+ help="Filename containing globs for files "
+ "that should be excluded from the checks")
+ parser.add_argument("--source_dirs",
+ help="Comma-separated root directories of the code")
+ parser.add_argument("--fix", default=False,
+ action="store_true",
+ help="If specified, will re-format the source "
+ "code instead of comparing the re-formatted "
+ "output, defaults to %(default)s")
+ parser.add_argument("--quiet", default=False,
+ action="store_true",
+ help="If specified, only print errors")
+
+ args = parser.parse_args()
+
+ had_err = False
+ exclude_globs = []
+ if args.exclude_globs:
+ for line in open(args.exclude_globs):
+ if line.strip() == "":
+ continue
+ if line[0] == "#":
+ continue
+ exclude_globs.append(line.strip())
+
+ for source_dir in args.source_dirs.split(','):
+ if len(source_dir) > 0:
+ had_err = had_err or check(args, source_dir)
+
+ sys.exit(1 if had_err else 0)
\ No newline at end of file
diff --git a/run_clang_tidy.py b/c++/build-support/run_clang_tidy.py
old mode 100644
new mode 100755
similarity index 100%
rename from run_clang_tidy.py
rename to c++/build-support/run_clang_tidy.py
diff --git a/c++/src/CpuInfoUtil.cc b/c++/src/CpuInfoUtil.cc
index 82669de20..588f8dc96 100644
--- a/c++/src/CpuInfoUtil.cc
+++ b/c++/src/CpuInfoUtil.cc
@@ -74,7 +74,7 @@ namespace orc {
#if defined(_WIN32)
//------------------------------ WINDOWS ------------------------------//
- void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+ void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cacheSizes) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
DWORD buffer_size = 0;
@@ -108,8 +108,8 @@ namespace orc {
if (RelationCache == buffer_position->Relationship) {
PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
if (cache->Level >= 1 && cache->Level <= kCacheLevels) {
- const int64_t current = (*cache_sizes)[cache->Level - 1];
- (*cache_sizes)[cache->Level - 1] = std::max<int64_t>(current,
cache->Size);
+ const int64_t current = (*cacheSizes)[cache->Level - 1];
+ (*cacheSizes)[cache->Level - 1] = std::max<int64_t>(current,
cache->Size);
}
}
offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
@@ -136,23 +136,22 @@ namespace orc {
}
#endif // MINGW
- void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
- std::string* model_name) {
+ void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+ std::string* modelName) {
int register_EAX_id = 1;
int highest_valid_id = 0;
int highest_extended_valid_id = 0;
std::bitset<32> features_ECX;
- std::array<int, 4> cpu_info;
+ std::array<int, 4> cpuInfo;
// Get highest valid id
- __cpuid(cpu_info.data(), 0);
- highest_valid_id = cpu_info[0];
+ __cpuid(cpuInfo.data(), 0);
+ highest_valid_id = cpuInfo[0];
// HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
// HEX of "AuthenticAMD": 41757468 656E7469 63414D44
- if (cpu_info[1] == 0x756e6547 && cpu_info[3] == 0x49656e69 &&
cpu_info[2] == 0x6c65746e) {
+ if (cpuInfo[1] == 0x756e6547 && cpuInfo[3] == 0x49656e69 && cpuInfo[2]
== 0x6c65746e) {
*vendor = CpuInfo::Vendor::Intel;
- } else if (cpu_info[1] == 0x68747541 && cpu_info[3] == 0x69746e65 &&
- cpu_info[2] == 0x444d4163) {
+ } else if (cpuInfo[1] == 0x68747541 && cpuInfo[3] == 0x69746e65 &&
cpuInfo[2] == 0x444d4163) {
*vendor = CpuInfo::Vendor::AMD;
}
@@ -161,19 +160,19 @@ namespace orc {
}
// EAX=1: Processor Info and Feature Bits
- __cpuidex(cpu_info.data(), register_EAX_id, 0);
- features_ECX = cpu_info[2];
+ __cpuidex(cpuInfo.data(), register_EAX_id, 0);
+ features_ECX = cpuInfo[2];
// Get highest extended id
- __cpuid(cpu_info.data(), 0x80000000);
- highest_extended_valid_id = cpu_info[0];
+ __cpuid(cpuInfo.data(), 0x80000000);
+ highest_extended_valid_id = cpuInfo[0];
// Retrieve CPU model name
if (highest_extended_valid_id >= static_cast<int>(0x80000004)) {
- model_name->clear();
+ modelName->clear();
for (int i = 0x80000002; i <= static_cast<int>(0x80000004); ++i) {
- __cpuidex(cpu_info.data(), i, 0);
- *model_name += std::string(reinterpret_cast<char*>(cpu_info.data()),
sizeof(cpu_info));
+ __cpuidex(cpuInfo.data(), i, 0);
+ *modelName += std::string(reinterpret_cast<char*>(cpuInfo.data()),
sizeof(cpuInfo));
}
}
@@ -184,37 +183,37 @@ namespace orc {
zmm_enabled = (xcr0 & 0xE0) == 0xE0;
}
- if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
- if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
- if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
- if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
- if (features_ECX[28]) *hardware_flags |= CpuInfo::AVX;
+ if (features_ECX[9]) *hardwareFlags |= CpuInfo::SSSE3;
+ if (features_ECX[19]) *hardwareFlags |= CpuInfo::SSE4_1;
+ if (features_ECX[20]) *hardwareFlags |= CpuInfo::SSE4_2;
+ if (features_ECX[23]) *hardwareFlags |= CpuInfo::POPCNT;
+ if (features_ECX[28]) *hardwareFlags |= CpuInfo::AVX;
// cpuid with EAX=7, ECX=0: Extended Features
register_EAX_id = 7;
if (highest_valid_id > register_EAX_id) {
- __cpuidex(cpu_info.data(), register_EAX_id, 0);
- std::bitset<32> features_EBX = cpu_info[1];
+ __cpuidex(cpuInfo.data(), register_EAX_id, 0);
+ std::bitset<32> features_EBX = cpuInfo[1];
- if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
- if (features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
- if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
+ if (features_EBX[3]) *hardwareFlags |= CpuInfo::BMI1;
+ if (features_EBX[5]) *hardwareFlags |= CpuInfo::AVX2;
+ if (features_EBX[8]) *hardwareFlags |= CpuInfo::BMI2;
if (zmm_enabled) {
- if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F;
- if (features_EBX[17]) *hardware_flags |= CpuInfo::AVX512DQ;
- if (features_EBX[28]) *hardware_flags |= CpuInfo::AVX512CD;
- if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW;
- if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
+ if (features_EBX[16]) *hardwareFlags |= CpuInfo::AVX512F;
+ if (features_EBX[17]) *hardwareFlags |= CpuInfo::AVX512DQ;
+ if (features_EBX[28]) *hardwareFlags |= CpuInfo::AVX512CD;
+ if (features_EBX[30]) *hardwareFlags |= CpuInfo::AVX512BW;
+ if (features_EBX[31]) *hardwareFlags |= CpuInfo::AVX512VL;
}
}
}
#elif defined(CPUINFO_ARCH_ARM)
// Windows on Arm
- void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
- std::string* model_name) {
- *hardware_flags |= CpuInfo::ASIMD;
- // TODO: vendor, model_name
+ void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+ std::string* modelName) {
+ *hardwareFlags |= CpuInfo::ASIMD;
+ // TODO: vendor, modelName
}
#endif
@@ -236,25 +235,25 @@ namespace orc {
return std::nullopt;
}
- void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+ void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cacheSizes) {
static_assert(kCacheLevels >= 3, "");
auto c = IntegerSysCtlByName("hw.l1dcachesize");
if (c.has_value()) {
- (*cache_sizes)[0] = *c;
+ (*cacheSizes)[0] = *c;
}
c = IntegerSysCtlByName("hw.l2cachesize");
if (c.has_value()) {
- (*cache_sizes)[1] = *c;
+ (*cacheSizes)[1] = *c;
}
c = IntegerSysCtlByName("hw.l3cachesize");
if (c.has_value()) {
- (*cache_sizes)[2] = *c;
+ (*cacheSizes)[2] = *c;
}
}
- void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
- std::string* model_name) {
- // hardware_flags
+ void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+ std::string* modelName) {
+ // hardwareFlags
struct SysCtlCpuFeature {
const char* name;
int64_t flag;
@@ -280,13 +279,13 @@ namespace orc {
for (const auto& feature : features) {
auto v = IntegerSysCtlByName(feature.name);
if (v.value_or(0)) {
- *hardware_flags |= feature.flag;
+ *hardwareFlags |= feature.flag;
}
}
- // TODO: vendor, model_name
+ // TODO: vendor, modelName
*vendor = CpuInfo::Vendor::Unknown;
- *model_name = "Unknown";
+ *modelName = "Unknown";
}
#else
@@ -345,7 +344,7 @@ namespace orc {
const struct {
std::string name;
int64_t flag;
- } flag_mappings[] = {
+ } flagMappings[] = {
#if defined(CPUINFO_ARCH_X86)
{"ssse3", CpuInfo::SSSE3},
{"sse4_1", CpuInfo::SSE4_1},
@@ -364,12 +363,12 @@ namespace orc {
{"asimd", CpuInfo::ASIMD},
#endif
};
- const int64_t num_flags = sizeof(flag_mappings) /
sizeof(flag_mappings[0]);
+ const int64_t num_flags = sizeof(flagMappings) / sizeof(flagMappings[0]);
int64_t flags = 0;
for (int i = 0; i < num_flags; ++i) {
- if (values.find(flag_mappings[i].name) != std::string::npos) {
- flags |= flag_mappings[i].flag;
+ if (values.find(flagMappings[i].name) != std::string::npos) {
+ flags |= flagMappings[i].flag;
}
}
return flags;
@@ -469,9 +468,9 @@ namespace orc {
#elif defined(CPUINFO_ARCH_ARM)
//------------------------------ AARCH64 ------------------------------//
- bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
- if (simd_level == "NONE") {
- *hardware_flags &= ~CpuInfo::ASIMD;
+ bool ArchParseUserSimdLevel(const std::string& simdLevel, int64_t*
hardwareFlags) {
+ if (simdLevel == "NONE") {
+ *hardwareFlags &= ~CpuInfo::ASIMD;
return true;
}
return false;
@@ -485,7 +484,7 @@ namespace orc {
#else
//------------------------------ PPC, ... ------------------------------//
- bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
+ bool ArchParseUserSimdLevel(const std::string& simdLevel, int64_t*
hardwareFlags) {
return true;
}
@@ -496,17 +495,17 @@ namespace orc {
} // namespace
struct CpuInfo::Impl {
- int64_t hardware_flags = 0;
+ int64_t hardwareFlags = 0;
int numCores = 0;
- int64_t original_hardware_flags = 0;
+ int64_t originalHardwareFlags = 0;
Vendor vendor = Vendor::Unknown;
- std::string model_name = "Unknown";
- std::array<int64_t, kCacheLevels> cache_sizes{};
+ std::string modelName = "Unknown";
+ std::array<int64_t, kCacheLevels> cacheSizes{};
Impl() {
- OsRetrieveCacheSize(&cache_sizes);
- OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
- original_hardware_flags = hardware_flags;
+ OsRetrieveCacheSize(&cacheSizes);
+ OsRetrieveCpuInfo(&hardwareFlags, &vendor, &modelName);
+ originalHardwareFlags = hardwareFlags;
numCores =
std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
// parse user simd level
@@ -514,7 +513,7 @@ namespace orc {
std::string userSimdLevel = maybe_env_var == nullptr ? "NONE" :
std::string(maybe_env_var);
std::transform(userSimdLevel.begin(), userSimdLevel.end(),
userSimdLevel.begin(),
[](unsigned char c) { return std::toupper(c); });
- if (!ArchParseUserSimdLevel(userSimdLevel, &hardware_flags)) {
+ if (!ArchParseUserSimdLevel(userSimdLevel, &hardwareFlags)) {
throw ParseError("Invalid value for ORC_USER_SIMD_LEVEL: " +
userSimdLevel);
}
}
@@ -530,8 +529,8 @@ namespace orc {
#endif
const CpuInfo* CpuInfo::getInstance() {
- static CpuInfo cpu_info;
- return &cpu_info;
+ static CpuInfo cpuInfo;
+ return &cpuInfo;
}
#ifdef __clang__
@@ -539,7 +538,7 @@ namespace orc {
#endif
int64_t CpuInfo::hardwareFlags() const {
- return impl_->hardware_flags;
+ return impl_->hardwareFlags;
}
int CpuInfo::numCores() const {
@@ -551,7 +550,7 @@ namespace orc {
}
const std::string& CpuInfo::modelName() const {
- return impl_->model_name;
+ return impl_->modelName;
}
int64_t CpuInfo::cacheSize(CacheLevel level) const {
@@ -564,18 +563,18 @@ namespace orc {
static_assert(static_cast<int>(CacheLevel::L1) == 0, "");
const int i = static_cast<int>(level);
- if (impl_->cache_sizes[i] > 0) return impl_->cache_sizes[i];
+ if (impl_->cacheSizes[i] > 0) return impl_->cacheSizes[i];
if (i == 0) return kDefaultCacheSizes[0];
// l3 may be not available, return maximum of l2 or default size
- return std::max(kDefaultCacheSizes[i], impl_->cache_sizes[i - 1]);
+ return std::max(kDefaultCacheSizes[i], impl_->cacheSizes[i - 1]);
}
bool CpuInfo::isSupported(int64_t flags) const {
- return (impl_->hardware_flags & flags) == flags;
+ return (impl_->hardwareFlags & flags) == flags;
}
bool CpuInfo::isDetected(int64_t flags) const {
- return (impl_->original_hardware_flags & flags) == flags;
+ return (impl_->originalHardwareFlags & flags) == flags;
}
void CpuInfo::verifyCpuRequirements() const {
diff --git a/c++/src/OrcHdfsFile.cc b/c++/src/OrcHdfsFile.cc
index 09ff71a0e..d878e276c 100644
--- a/c++/src/OrcHdfsFile.cc
+++ b/c++/src/OrcHdfsFile.cc
@@ -42,23 +42,23 @@ namespace orc {
class HdfsFileInputStream : public InputStream {
private:
- std::string filename;
- std::unique_ptr<hdfs::FileHandle> file;
- std::unique_ptr<hdfs::FileSystem> file_system;
- uint64_t totalLength;
- const uint64_t READ_SIZE = 1024 * 1024; // 1 MB
- ReaderMetrics* metrics;
+ std::string filename_;
+ std::unique_ptr<hdfs::FileHandle> file_;
+ std::unique_ptr<hdfs::FileSystem> fileSystem_;
+ uint64_t totalLength_;
+ const uint64_t readSize_ = 1024 * 1024; // 1 MB
+ ReaderMetrics* metrics_;
public:
- HdfsFileInputStream(std::string _filename, ReaderMetrics* _metrics) :
metrics(_metrics) {
- filename = _filename;
+ HdfsFileInputStream(std::string filename, ReaderMetrics* metrics) :
metrics_(metrics) {
+ filename_ = filename;
// Building a URI object from the given uri_path
hdfs::URI uri;
try {
- uri = hdfs::URI::parse_from_string(filename);
+ uri = hdfs::URI::parse_from_string(filename_);
} catch (const hdfs::uri_parse_error&) {
- throw ParseError("Malformed URI: " + filename);
+ throw ParseError("Malformed URI: " + filename_);
}
// This sets conf path to default "$HADOOP_CONF_DIR" or
"/etc/hadoop/conf"
@@ -82,9 +82,9 @@ namespace orc {
}
hdfs::IoService* io_service = hdfs::IoService::New();
// Wrapping file_system into a unique pointer to guarantee deletion
- file_system =
+ fileSystem_ =
std::unique_ptr<hdfs::FileSystem>(hdfs::FileSystem::New(io_service,
"", options));
- if (file_system.get() == nullptr) {
+ if (fileSystem_.get() == nullptr) {
throw ParseError("Can't create FileSystem object. ");
}
hdfs::Status status;
@@ -92,13 +92,13 @@ namespace orc {
if (!uri.get_host().empty()) {
// Using port if supplied, otherwise using "" to look up port in
configs
std::string port = uri.has_port() ? std::to_string(uri.get_port()) :
"";
- status = file_system->Connect(uri.get_host(), port);
+ status = fileSystem_->Connect(uri.get_host(), port);
if (!status.ok()) {
throw ParseError("Can't connect to " + uri.get_host() + ":" + port +
". " +
status.ToString());
}
} else {
- status = file_system->ConnectToDefaultFs();
+ status = fileSystem_->ConnectToDefaultFs();
if (!status.ok()) {
if (!options.defaultFS.get_host().empty()) {
throw ParseError("Error connecting to " + options.defaultFS.str()
+ ". " +
@@ -110,32 +110,32 @@ namespace orc {
}
}
- if (file_system.get() == nullptr) {
+ if (fileSystem_.get() == nullptr) {
throw ParseError("Can't connect the file system. ");
}
hdfs::FileHandle* file_raw = nullptr;
- status = file_system->Open(uri.get_path(true), &file_raw);
+ status = fileSystem_->Open(uri.get_path(true), &file_raw);
if (!status.ok()) {
throw ParseError("Can't open " + uri.get_path(true) + ". " +
status.ToString());
}
// Wrapping file_raw into a unique pointer to guarantee deletion
- file.reset(file_raw);
+ file_.reset(file_raw);
hdfs::StatInfo stat_info;
- status = file_system->GetFileInfo(uri.get_path(true), stat_info);
+ status = fileSystem_->GetFileInfo(uri.get_path(true), stat_info);
if (!status.ok()) {
throw ParseError("Can't stat " + uri.get_path(true) + ". " +
status.ToString());
}
- totalLength = stat_info.length;
+ totalLength_ = stat_info.length;
}
uint64_t getLength() const override {
- return totalLength;
+ return totalLength_;
}
uint64_t getNaturalReadSize() const override {
- return READ_SIZE;
+ return readSize_;
}
void read(void* buf, uint64_t length, uint64_t offset) override {
@@ -151,8 +151,8 @@ namespace orc {
do {
status =
- file->PositionRead(buf_ptr, static_cast<size_t>(length) -
total_bytes_read,
- static_cast<off_t>(offset + total_bytes_read),
&last_bytes_read);
+ file_->PositionRead(buf_ptr, static_cast<size_t>(length) -
total_bytes_read,
+ static_cast<off_t>(offset + total_bytes_read),
&last_bytes_read);
if (!status.ok()) {
throw ParseError("Error reading the file: " + status.ToString());
}
@@ -162,7 +162,7 @@ namespace orc {
}
const std::string& getName() const override {
- return filename;
+ return filename_;
}
~HdfsFileInputStream() override;
diff --git a/cmake_modules/CheckFormat.cmake b/cmake_modules/CheckFormat.cmake
new file mode 100644
index 000000000..1aff5d765
--- /dev/null
+++ b/cmake_modules/CheckFormat.cmake
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Needed for linting targets, etc.
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3)
+set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
+
+set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/c++/build-support")
+
+find_program(CLANG_FORMAT_BIN
+ NAMES clang-format-13
+ HINTS ${CLANG_SEARCH_PATH})
+
+find_program(CLANG_TIDY_BIN
+ NAMES clang-tidy-13
+ HINTS ${CLANG_SEARCH_PATH})
+
+find_program(CLANG_APPLY_REPLACEMENTS_BIN
+ NAMES clang-apply-replacements-13
+ HINTS ${CLANG_SEARCH_PATH})
+
+
+if("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND")
+ message(WARNING "Couldn't find clang-format.")
+else()
+ message(STATUS "Found clang-format at ${CLANG_FORMAT_BIN}")
+endif()
+
+if("${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND")
+ message(WARNING "Couldn't find clang-tidy.")
+else()
+ # Output compile_commands.json
+ set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+ message(STATUS "Found clang-tidy at ${CLANG_TIDY_BIN}")
+endif()
+
+if("${CLANG_APPLY_REPLACEMENTS_BIN}" STREQUAL
"CLANG_APPLY_REPLACEMENTS_BIN-NOTFOUND")
+ message(WARNING "Couldn't find clang-apply-replacements.")
+else()
+ # Output compile_commands.json
+ set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+ message(STATUS "Found clang-apply-replacements at
${CLANG_APPLY_REPLACEMENTS_BIN}")
+endif()
+
+if(NOT LINT_EXCLUSIONS_FILE)
+ # source files matching a glob from a line in this file
+ # will be excluded from linting (cpplint, clang-tidy, clang-format)
+ set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
+endif()
+
+# runs clang-tidy and exits with a non-zero exit code if any errors are found.
+# note that clang-tidy automatically looks for a .clang-tidy file in parent
directories
+add_custom_target(check-clang-tidy
+ ${PYTHON_EXECUTABLE}
+ ${BUILD_SUPPORT_DIR}/run_clang_tidy.py # run LLVM's clang-tidy script
+ -clang-tidy-binary ${CLANG_TIDY_BIN} # using our clang-tidy binary
+ -p ${CMAKE_BINARY_DIR} # using cmake's generated compile commands
+)
+
+add_custom_target(fix-clang-tidy
+ ${PYTHON_EXECUTABLE}
+ ${BUILD_SUPPORT_DIR}/run_clang_tidy.py # run LLVM's clang-tidy script
+ -clang-tidy-binary ${CLANG_TIDY_BIN} # using our clang-tidy binary
+ -p ${CMAKE_BINARY_DIR} # using cmake's generated compile commands
+ -clang-apply-replacements-binary ${CLANG_APPLY_REPLACEMENTS_BIN} #
using our clang-apply-replacements binary
+ -fix # apply suggested changes generated by clang-tidy
+)
+
+string(CONCAT ORC_FORMAT_DIRS
+ "${CMAKE_SOURCE_DIR}/c++,"
+ "${CMAKE_SOURCE_DIR}/tools,"
+)
+
+add_custom_target(format
+ ${PYTHON_EXECUTABLE}
+ ${BUILD_SUPPORT_DIR}/run_clang_format.py
+ ${CLANG_FORMAT_BIN}
+ --source_dirs
+ ${ORC_FORMAT_DIRS}
+ --fix
+)
+
+# Runs clang format and exits with a non-zero exit code if any files need to
be reformatted
+add_custom_target(check-format
+ ${PYTHON_EXECUTABLE}
+ ${BUILD_SUPPORT_DIR}/run_clang_format.py
+ ${CLANG_FORMAT_BIN}
+ --source_dirs
+ ${ORC_FORMAT_DIRS}
+)
\ No newline at end of file