This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 545ead3ac ORC-1687: [C++] Enforce naming style
545ead3ac is described below

commit 545ead3ac0deb7ec9b1602e81df18e0c2864ea7a
Author: ffacs <ffacs@[email protected]>
AuthorDate: Wed Apr 17 18:05:43 2024 +0800

    ORC-1687: [C++] Enforce naming style
    
    ### What changes were proposed in this pull request?
    1. Add `make check-format`, `make check-clang-tidy`,`make format` and `make 
fix-clang-tidy`
    2. Add a github action that runs check-clang-tidy
    3. Fix naming style errors in `c++/src/OrcHdfsFile.cc`
    
    ### Why are the changes needed?
    To enforce identifiers' naming style on c++ side.
    
    ### How was this patch tested?
    UT passed
    
    ### Was this patch authored or co-authored using generative AI tooling?
    NO
    
    Closes #1888 from ffacs/NamingStyle.
    
    Lead-authored-by: ffacs <ffacs@[email protected]>
    Co-authored-by: ffacs <[email protected]>
    Co-authored-by: ffacs <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 .clang-tidy                                        |   5 +-
 .github/workflows/build_and_test.yml               |  25 ++--
 CMakeLists.txt                                     |   3 +
 c++/build-support/REAMD.md                         |  30 +++++
 c++/build-support/run_clang_format.py              | 132 +++++++++++++++++++
 .../build-support/run_clang_tidy.py                |   0
 c++/src/CpuInfoUtil.cc                             | 141 ++++++++++-----------
 c++/src/OrcHdfsFile.cc                             |  48 +++----
 cmake_modules/CheckFormat.cmake                    | 111 ++++++++++++++++
 9 files changed, 386 insertions(+), 109 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index bd995bca5..b401f8948 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -21,13 +21,14 @@ Checks: "-*,
 
 CheckOptions:
   [
+    { key: readability-identifier-naming.ParameterCase, value: "camelBack" },
+    { key: readability-identifier-naming.PrivateMemberCase, value: 
"camelBack"},
     { key: readability-identifier-naming.PrivateMemberSuffix, value: "_" },
     { key: readability-identifier-naming.ProtectedMemberSuffix, value: "" },
     { key: readability-identifier-naming.PublicMemberSuffix, value: "" },
-    { key: readability-identifier-naming.ParameterCase, value: "camelBack" },
     { key: readability-identifier-naming.ParameterIgnoredRegexp, value: 
"^[a-zA-Z]$" },
   ]
 
 WarningsAsErrors: ''
-HeaderFilterRegex: '.*'
+HeaderFilterRegex: '(orc/c\+\+/|orc/tools)'
 FormatStyle: none
\ No newline at end of file
diff --git a/.github/workflows/build_and_test.yml 
b/.github/workflows/build_and_test.yml
index b0350193b..533ec62ce 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -169,19 +169,20 @@ jobs:
 
   formatting-check:
     name: "C++ format check"
-    runs-on: ubuntu-20.04
-    strategy:
-      matrix:
-        path:
-          - 'c++'
-          - 'tools'
+    runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v3
-    - name: Run clang-format style check for C++ code
-      uses: jidicula/[email protected]
-      with:
-        clang-format-version: '13'
-        check-path: ${{ matrix.path }}
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Run build
+        run: |
+          mkdir build
+          cd build
+          cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++ 
-DCMAKE_C_COMPILER=clang -DSTOP_BUILD_ON_WARNING=OFF 
-DCMAKE_EXPORT_COMPILE_COMMANDS=1
+          make
+      - name: Check clang-tidy
+        run: cd build && make check-clang-tidy
+      - name: Check clang-format
+        run: cd build && make check-format
 
   license-check:
     name: "License Check"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index af5b9f406..63e144b93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,7 @@ SET(CPACK_PACKAGE_VERSION_MINOR "1")
 SET(CPACK_PACKAGE_VERSION_PATCH "0-SNAPSHOT")
 SET(ORC_VERSION 
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} 
"${PROJECT_SOURCE_DIR}/cmake_modules")
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy.
 
 option (BUILD_JAVA
     "Include ORC Java library in the build process"
@@ -210,3 +211,5 @@ if (BUILD_CPP_TESTS)
     )
   endif ()
 endif ()
+
+INCLUDE(CheckFormat)
\ No newline at end of file
diff --git a/c++/build-support/REAMD.md b/c++/build-support/REAMD.md
new file mode 100644
index 000000000..0ffad788e
--- /dev/null
+++ b/c++/build-support/REAMD.md
@@ -0,0 +1,30 @@
+# Build support
+
+The Python scripts under the folder provide capabilities for formatting code.
+Make sure you've installed `clang-format-13`, `clang-tidy-13` and 
`clang-apply-replacements-13` and cmake could find them.
+We enforce the version of tools because different versions of tools may 
generate different results.
+
+## clang-format
+
+To use `run_clang_format.py` you could act like below:
+
+```shell
+mkdir build
+cd build
+cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++ 
-DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1
+make check-format # Do checks only
+make format # This would apply suggested changes, take care!
+```
+
+## clang-tidy
+
+To use `run_clang_tidy.py` you could act like below:
+
+```shell
+mkdir build
+cd build
+cmake .. -DBUILD_JAVA=OFF -DCMAKE_CXX_COMPILER=clang++ 
-DCMAKE_C_COMPILER=clang -DCMAKE_EXPORT_COMPILE_COMMANDS=1
+make -j`nproc` # Important
+make check-clang-tidy # Do checks only
+make fix-clang-tidy # This would apply suggested changes, take care!
+```
diff --git a/c++/build-support/run_clang_format.py 
b/c++/build-support/run_clang_format.py
new file mode 100644
index 000000000..52d2e6b25
--- /dev/null
+++ b/c++/build-support/run_clang_format.py
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import codecs
+import difflib
+import fnmatch
+import os
+import subprocess
+import sys
+
+
+def check(arguments, source_dir):
+    formatted_filenames = []
+    error = False
+    for directory, subdirs, filenames in os.walk(source_dir):
+        fullpaths = (os.path.join(directory, filename)
+                     for filename in filenames)
+        source_files = [x for x in fullpaths
+                        if x.endswith(".hh") or x.endswith(".cc")]
+        formatted_filenames.extend(
+            # Filter out files that match the globs in the globs file
+            [filename for filename in source_files
+             if not any((fnmatch.fnmatch(filename, exclude_glob)
+                         for exclude_glob in exclude_globs))])
+
+    if arguments.fix:
+        if not arguments.quiet:
+            # Print out each file on its own line, but run
+            # clang format once for all of the files
+            print("\n".join(map(lambda x: "Formatting {}".format(x),
+                                formatted_filenames)))
+        subprocess.check_call([arguments.clang_format_binary,
+                               "-i"] + formatted_filenames)
+    else:
+        for filename in formatted_filenames:
+            if not arguments.quiet:
+                print("Checking {}".format(filename))
+            #
+            # Due to some incompatibilities between Python 2 and
+            # Python 3, there are some specific actions we take here
+            # to make sure the difflib.unified_diff call works.
+            #
+            # In Python 2, the call to subprocess.check_output return
+            # a 'str' type. In Python 3, however, the call returns a
+            # 'bytes' type unless the 'encoding' argument is
+            # specified. Unfortunately, the 'encoding' argument is not
+            # in the Python 2 API. We could do an if/else here based
+            # on the version of Python we are running, but it's more
+            # straightforward to read the file in binary and do utf-8
+            # conversion. In Python 2, it's just converting string
+            # types to unicode types, whereas in Python 3 it's
+            # converting bytes types to utf-8 encoded str types. This
+            # approach ensures that the arguments to
+            # difflib.unified_diff are acceptable string types in both
+            # Python 2 and Python 3.
+            with open(filename, "rb") as reader:
+                # Run clang-format and capture its output
+                formatted = subprocess.check_output(
+                    [arguments.clang_format_binary,
+                     filename])
+                formatted = codecs.decode(formatted, "utf-8")
+                # Read the original file
+                original = codecs.decode(reader.read(), "utf-8")
+                # Run the equivalent of diff -u
+                diff = list(difflib.unified_diff(
+                    original.splitlines(True),
+                    formatted.splitlines(True),
+                    fromfile=filename,
+                    tofile="{} (after clang format)".format(
+                        filename)))
+                if diff:
+                    print("{} had clang-format style issues".format(filename))
+                    # Print out the diff to stderr
+                    error = True
+                    sys.stderr.writelines(diff)
+    return error
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Runs clang format on all of the source "
+        "files. If --fix is specified,  and compares the output "
+        "with the existing file, outputting a unifiied diff if "
+        "there are any necessary changes")
+    parser.add_argument("clang_format_binary",
+                        help="Path to the clang-format binary")
+    parser.add_argument("--exclude_globs",
+                        help="Filename containing globs for files "
+                        "that should be excluded from the checks")
+    parser.add_argument("--source_dirs",
+                        help="Comma-separated root directories of the code")
+    parser.add_argument("--fix", default=False,
+                        action="store_true",
+                        help="If specified, will re-format the source "
+                        "code instead of comparing the re-formatted "
+                        "output, defaults to %(default)s")
+    parser.add_argument("--quiet", default=False,
+                        action="store_true",
+                        help="If specified, only print errors")
+
+    args = parser.parse_args()
+
+    had_err = False
+    exclude_globs = []
+    if args.exclude_globs:
+        for line in open(args.exclude_globs):
+            if line.strip() == "":
+                continue
+            if line[0] == "#":
+                continue
+            exclude_globs.append(line.strip())
+
+    for source_dir in args.source_dirs.split(','):
+        if len(source_dir) > 0:
+            had_err = had_err or check(args, source_dir)
+
+    sys.exit(1 if had_err else 0)
\ No newline at end of file
diff --git a/run_clang_tidy.py b/c++/build-support/run_clang_tidy.py
old mode 100644
new mode 100755
similarity index 100%
rename from run_clang_tidy.py
rename to c++/build-support/run_clang_tidy.py
diff --git a/c++/src/CpuInfoUtil.cc b/c++/src/CpuInfoUtil.cc
index 82669de20..588f8dc96 100644
--- a/c++/src/CpuInfoUtil.cc
+++ b/c++/src/CpuInfoUtil.cc
@@ -74,7 +74,7 @@ namespace orc {
 
 #if defined(_WIN32)
     //------------------------------ WINDOWS ------------------------------//
-    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cacheSizes) {
       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
       DWORD buffer_size = 0;
@@ -108,8 +108,8 @@ namespace orc {
         if (RelationCache == buffer_position->Relationship) {
           PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
           if (cache->Level >= 1 && cache->Level <= kCacheLevels) {
-            const int64_t current = (*cache_sizes)[cache->Level - 1];
-            (*cache_sizes)[cache->Level - 1] = std::max<int64_t>(current, 
cache->Size);
+            const int64_t current = (*cacheSizes)[cache->Level - 1];
+            (*cacheSizes)[cache->Level - 1] = std::max<int64_t>(current, 
cache->Size);
           }
         }
         offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
@@ -136,23 +136,22 @@ namespace orc {
     }
 #endif  // MINGW
 
-    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
-                           std::string* model_name) {
+    void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+                           std::string* modelName) {
       int register_EAX_id = 1;
       int highest_valid_id = 0;
       int highest_extended_valid_id = 0;
       std::bitset<32> features_ECX;
-      std::array<int, 4> cpu_info;
+      std::array<int, 4> cpuInfo;
 
       // Get highest valid id
-      __cpuid(cpu_info.data(), 0);
-      highest_valid_id = cpu_info[0];
+      __cpuid(cpuInfo.data(), 0);
+      highest_valid_id = cpuInfo[0];
       // HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
       // HEX of "AuthenticAMD": 41757468 656E7469 63414D44
-      if (cpu_info[1] == 0x756e6547 && cpu_info[3] == 0x49656e69 && 
cpu_info[2] == 0x6c65746e) {
+      if (cpuInfo[1] == 0x756e6547 && cpuInfo[3] == 0x49656e69 && cpuInfo[2] 
== 0x6c65746e) {
         *vendor = CpuInfo::Vendor::Intel;
-      } else if (cpu_info[1] == 0x68747541 && cpu_info[3] == 0x69746e65 &&
-                 cpu_info[2] == 0x444d4163) {
+      } else if (cpuInfo[1] == 0x68747541 && cpuInfo[3] == 0x69746e65 && 
cpuInfo[2] == 0x444d4163) {
         *vendor = CpuInfo::Vendor::AMD;
       }
 
@@ -161,19 +160,19 @@ namespace orc {
       }
 
       // EAX=1: Processor Info and Feature Bits
-      __cpuidex(cpu_info.data(), register_EAX_id, 0);
-      features_ECX = cpu_info[2];
+      __cpuidex(cpuInfo.data(), register_EAX_id, 0);
+      features_ECX = cpuInfo[2];
 
       // Get highest extended id
-      __cpuid(cpu_info.data(), 0x80000000);
-      highest_extended_valid_id = cpu_info[0];
+      __cpuid(cpuInfo.data(), 0x80000000);
+      highest_extended_valid_id = cpuInfo[0];
 
       // Retrieve CPU model name
       if (highest_extended_valid_id >= static_cast<int>(0x80000004)) {
-        model_name->clear();
+        modelName->clear();
         for (int i = 0x80000002; i <= static_cast<int>(0x80000004); ++i) {
-          __cpuidex(cpu_info.data(), i, 0);
-          *model_name += std::string(reinterpret_cast<char*>(cpu_info.data()), 
sizeof(cpu_info));
+          __cpuidex(cpuInfo.data(), i, 0);
+          *modelName += std::string(reinterpret_cast<char*>(cpuInfo.data()), 
sizeof(cpuInfo));
         }
       }
 
@@ -184,37 +183,37 @@ namespace orc {
         zmm_enabled = (xcr0 & 0xE0) == 0xE0;
       }
 
-      if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
-      if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
-      if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
-      if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
-      if (features_ECX[28]) *hardware_flags |= CpuInfo::AVX;
+      if (features_ECX[9]) *hardwareFlags |= CpuInfo::SSSE3;
+      if (features_ECX[19]) *hardwareFlags |= CpuInfo::SSE4_1;
+      if (features_ECX[20]) *hardwareFlags |= CpuInfo::SSE4_2;
+      if (features_ECX[23]) *hardwareFlags |= CpuInfo::POPCNT;
+      if (features_ECX[28]) *hardwareFlags |= CpuInfo::AVX;
 
       // cpuid with EAX=7, ECX=0: Extended Features
       register_EAX_id = 7;
       if (highest_valid_id > register_EAX_id) {
-        __cpuidex(cpu_info.data(), register_EAX_id, 0);
-        std::bitset<32> features_EBX = cpu_info[1];
+        __cpuidex(cpuInfo.data(), register_EAX_id, 0);
+        std::bitset<32> features_EBX = cpuInfo[1];
 
-        if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
-        if (features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
-        if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
+        if (features_EBX[3]) *hardwareFlags |= CpuInfo::BMI1;
+        if (features_EBX[5]) *hardwareFlags |= CpuInfo::AVX2;
+        if (features_EBX[8]) *hardwareFlags |= CpuInfo::BMI2;
         if (zmm_enabled) {
-          if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F;
-          if (features_EBX[17]) *hardware_flags |= CpuInfo::AVX512DQ;
-          if (features_EBX[28]) *hardware_flags |= CpuInfo::AVX512CD;
-          if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW;
-          if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
+          if (features_EBX[16]) *hardwareFlags |= CpuInfo::AVX512F;
+          if (features_EBX[17]) *hardwareFlags |= CpuInfo::AVX512DQ;
+          if (features_EBX[28]) *hardwareFlags |= CpuInfo::AVX512CD;
+          if (features_EBX[30]) *hardwareFlags |= CpuInfo::AVX512BW;
+          if (features_EBX[31]) *hardwareFlags |= CpuInfo::AVX512VL;
         }
       }
     }
 
 #elif defined(CPUINFO_ARCH_ARM)
     // Windows on Arm
-    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
-                           std::string* model_name) {
-      *hardware_flags |= CpuInfo::ASIMD;
-      // TODO: vendor, model_name
+    void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+                           std::string* modelName) {
+      *hardwareFlags |= CpuInfo::ASIMD;
+      // TODO: vendor, modelName
     }
 #endif
 
@@ -236,25 +235,25 @@ namespace orc {
       return std::nullopt;
     }
 
-    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+    void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cacheSizes) {
       static_assert(kCacheLevels >= 3, "");
       auto c = IntegerSysCtlByName("hw.l1dcachesize");
       if (c.has_value()) {
-        (*cache_sizes)[0] = *c;
+        (*cacheSizes)[0] = *c;
       }
       c = IntegerSysCtlByName("hw.l2cachesize");
       if (c.has_value()) {
-        (*cache_sizes)[1] = *c;
+        (*cacheSizes)[1] = *c;
       }
       c = IntegerSysCtlByName("hw.l3cachesize");
       if (c.has_value()) {
-        (*cache_sizes)[2] = *c;
+        (*cacheSizes)[2] = *c;
       }
     }
 
-    void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
-                           std::string* model_name) {
-      // hardware_flags
+    void OsRetrieveCpuInfo(int64_t* hardwareFlags, CpuInfo::Vendor* vendor,
+                           std::string* modelName) {
+      // hardwareFlags
       struct SysCtlCpuFeature {
         const char* name;
         int64_t flag;
@@ -280,13 +279,13 @@ namespace orc {
       for (const auto& feature : features) {
         auto v = IntegerSysCtlByName(feature.name);
         if (v.value_or(0)) {
-          *hardware_flags |= feature.flag;
+          *hardwareFlags |= feature.flag;
         }
       }
 
-      // TODO: vendor, model_name
+      // TODO: vendor, modelName
       *vendor = CpuInfo::Vendor::Unknown;
-      *model_name = "Unknown";
+      *modelName = "Unknown";
     }
 
 #else
@@ -345,7 +344,7 @@ namespace orc {
       const struct {
         std::string name;
         int64_t flag;
-      } flag_mappings[] = {
+      } flagMappings[] = {
 #if defined(CPUINFO_ARCH_X86)
         {"ssse3", CpuInfo::SSSE3},
         {"sse4_1", CpuInfo::SSE4_1},
@@ -364,12 +363,12 @@ namespace orc {
         {"asimd", CpuInfo::ASIMD},
 #endif
       };
-      const int64_t num_flags = sizeof(flag_mappings) / 
sizeof(flag_mappings[0]);
+      const int64_t num_flags = sizeof(flagMappings) / sizeof(flagMappings[0]);
 
       int64_t flags = 0;
       for (int i = 0; i < num_flags; ++i) {
-        if (values.find(flag_mappings[i].name) != std::string::npos) {
-          flags |= flag_mappings[i].flag;
+        if (values.find(flagMappings[i].name) != std::string::npos) {
+          flags |= flagMappings[i].flag;
         }
       }
       return flags;
@@ -469,9 +468,9 @@ namespace orc {
 
 #elif defined(CPUINFO_ARCH_ARM)
     //------------------------------ AARCH64 ------------------------------//
-    bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* 
hardware_flags) {
-      if (simd_level == "NONE") {
-        *hardware_flags &= ~CpuInfo::ASIMD;
+    bool ArchParseUserSimdLevel(const std::string& simdLevel, int64_t* 
hardwareFlags) {
+      if (simdLevel == "NONE") {
+        *hardwareFlags &= ~CpuInfo::ASIMD;
         return true;
       }
       return false;
@@ -485,7 +484,7 @@ namespace orc {
 
 #else
     //------------------------------ PPC, ... ------------------------------//
-    bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* 
hardware_flags) {
+    bool ArchParseUserSimdLevel(const std::string& simdLevel, int64_t* 
hardwareFlags) {
       return true;
     }
 
@@ -496,17 +495,17 @@ namespace orc {
   }  // namespace
 
   struct CpuInfo::Impl {
-    int64_t hardware_flags = 0;
+    int64_t hardwareFlags = 0;
     int numCores = 0;
-    int64_t original_hardware_flags = 0;
+    int64_t originalHardwareFlags = 0;
     Vendor vendor = Vendor::Unknown;
-    std::string model_name = "Unknown";
-    std::array<int64_t, kCacheLevels> cache_sizes{};
+    std::string modelName = "Unknown";
+    std::array<int64_t, kCacheLevels> cacheSizes{};
 
     Impl() {
-      OsRetrieveCacheSize(&cache_sizes);
-      OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
-      original_hardware_flags = hardware_flags;
+      OsRetrieveCacheSize(&cacheSizes);
+      OsRetrieveCpuInfo(&hardwareFlags, &vendor, &modelName);
+      originalHardwareFlags = hardwareFlags;
       numCores = 
std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
 
       // parse user simd level
@@ -514,7 +513,7 @@ namespace orc {
       std::string userSimdLevel = maybe_env_var == nullptr ? "NONE" : 
std::string(maybe_env_var);
       std::transform(userSimdLevel.begin(), userSimdLevel.end(), 
userSimdLevel.begin(),
                      [](unsigned char c) { return std::toupper(c); });
-      if (!ArchParseUserSimdLevel(userSimdLevel, &hardware_flags)) {
+      if (!ArchParseUserSimdLevel(userSimdLevel, &hardwareFlags)) {
         throw ParseError("Invalid value for ORC_USER_SIMD_LEVEL: " + 
userSimdLevel);
       }
     }
@@ -530,8 +529,8 @@ namespace orc {
 #endif
 
   const CpuInfo* CpuInfo::getInstance() {
-    static CpuInfo cpu_info;
-    return &cpu_info;
+    static CpuInfo cpuInfo;
+    return &cpuInfo;
   }
 
 #ifdef __clang__
@@ -539,7 +538,7 @@ namespace orc {
 #endif
 
   int64_t CpuInfo::hardwareFlags() const {
-    return impl_->hardware_flags;
+    return impl_->hardwareFlags;
   }
 
   int CpuInfo::numCores() const {
@@ -551,7 +550,7 @@ namespace orc {
   }
 
   const std::string& CpuInfo::modelName() const {
-    return impl_->model_name;
+    return impl_->modelName;
   }
 
   int64_t CpuInfo::cacheSize(CacheLevel level) const {
@@ -564,18 +563,18 @@ namespace orc {
 
     static_assert(static_cast<int>(CacheLevel::L1) == 0, "");
     const int i = static_cast<int>(level);
-    if (impl_->cache_sizes[i] > 0) return impl_->cache_sizes[i];
+    if (impl_->cacheSizes[i] > 0) return impl_->cacheSizes[i];
     if (i == 0) return kDefaultCacheSizes[0];
     // l3 may be not available, return maximum of l2 or default size
-    return std::max(kDefaultCacheSizes[i], impl_->cache_sizes[i - 1]);
+    return std::max(kDefaultCacheSizes[i], impl_->cacheSizes[i - 1]);
   }
 
   bool CpuInfo::isSupported(int64_t flags) const {
-    return (impl_->hardware_flags & flags) == flags;
+    return (impl_->hardwareFlags & flags) == flags;
   }
 
   bool CpuInfo::isDetected(int64_t flags) const {
-    return (impl_->original_hardware_flags & flags) == flags;
+    return (impl_->originalHardwareFlags & flags) == flags;
   }
 
   void CpuInfo::verifyCpuRequirements() const {
diff --git a/c++/src/OrcHdfsFile.cc b/c++/src/OrcHdfsFile.cc
index 09ff71a0e..d878e276c 100644
--- a/c++/src/OrcHdfsFile.cc
+++ b/c++/src/OrcHdfsFile.cc
@@ -42,23 +42,23 @@ namespace orc {
 
   class HdfsFileInputStream : public InputStream {
    private:
-    std::string filename;
-    std::unique_ptr<hdfs::FileHandle> file;
-    std::unique_ptr<hdfs::FileSystem> file_system;
-    uint64_t totalLength;
-    const uint64_t READ_SIZE = 1024 * 1024;  // 1 MB
-    ReaderMetrics* metrics;
+    std::string filename_;
+    std::unique_ptr<hdfs::FileHandle> file_;
+    std::unique_ptr<hdfs::FileSystem> fileSystem_;
+    uint64_t totalLength_;
+    const uint64_t readSize_ = 1024 * 1024;  // 1 MB
+    ReaderMetrics* metrics_;
 
    public:
-    HdfsFileInputStream(std::string _filename, ReaderMetrics* _metrics) : 
metrics(_metrics) {
-      filename = _filename;
+    HdfsFileInputStream(std::string filename, ReaderMetrics* metrics) : 
metrics_(metrics) {
+      filename_ = filename;
 
       // Building a URI object from the given uri_path
       hdfs::URI uri;
       try {
-        uri = hdfs::URI::parse_from_string(filename);
+        uri = hdfs::URI::parse_from_string(filename_);
       } catch (const hdfs::uri_parse_error&) {
-        throw ParseError("Malformed URI: " + filename);
+        throw ParseError("Malformed URI: " + filename_);
       }
 
       // This sets conf path to default "$HADOOP_CONF_DIR" or 
"/etc/hadoop/conf"
@@ -82,9 +82,9 @@ namespace orc {
       }
       hdfs::IoService* io_service = hdfs::IoService::New();
       // Wrapping file_system into a unique pointer to guarantee deletion
-      file_system =
+      fileSystem_ =
           std::unique_ptr<hdfs::FileSystem>(hdfs::FileSystem::New(io_service, 
"", options));
-      if (file_system.get() == nullptr) {
+      if (fileSystem_.get() == nullptr) {
         throw ParseError("Can't create FileSystem object. ");
       }
       hdfs::Status status;
@@ -92,13 +92,13 @@ namespace orc {
       if (!uri.get_host().empty()) {
         // Using port if supplied, otherwise using "" to look up port in 
configs
         std::string port = uri.has_port() ? std::to_string(uri.get_port()) : 
"";
-        status = file_system->Connect(uri.get_host(), port);
+        status = fileSystem_->Connect(uri.get_host(), port);
         if (!status.ok()) {
           throw ParseError("Can't connect to " + uri.get_host() + ":" + port + 
". " +
                            status.ToString());
         }
       } else {
-        status = file_system->ConnectToDefaultFs();
+        status = fileSystem_->ConnectToDefaultFs();
         if (!status.ok()) {
           if (!options.defaultFS.get_host().empty()) {
             throw ParseError("Error connecting to " + options.defaultFS.str() 
+ ". " +
@@ -110,32 +110,32 @@ namespace orc {
         }
       }
 
-      if (file_system.get() == nullptr) {
+      if (fileSystem_.get() == nullptr) {
         throw ParseError("Can't connect the file system. ");
       }
 
       hdfs::FileHandle* file_raw = nullptr;
-      status = file_system->Open(uri.get_path(true), &file_raw);
+      status = fileSystem_->Open(uri.get_path(true), &file_raw);
       if (!status.ok()) {
         throw ParseError("Can't open " + uri.get_path(true) + ". " + 
status.ToString());
       }
       // Wrapping file_raw into a unique pointer to guarantee deletion
-      file.reset(file_raw);
+      file_.reset(file_raw);
 
       hdfs::StatInfo stat_info;
-      status = file_system->GetFileInfo(uri.get_path(true), stat_info);
+      status = fileSystem_->GetFileInfo(uri.get_path(true), stat_info);
       if (!status.ok()) {
         throw ParseError("Can't stat " + uri.get_path(true) + ". " + 
status.ToString());
       }
-      totalLength = stat_info.length;
+      totalLength_ = stat_info.length;
     }
 
     uint64_t getLength() const override {
-      return totalLength;
+      return totalLength_;
     }
 
     uint64_t getNaturalReadSize() const override {
-      return READ_SIZE;
+      return readSize_;
     }
 
     void read(void* buf, uint64_t length, uint64_t offset) override {
@@ -151,8 +151,8 @@ namespace orc {
 
       do {
         status =
-            file->PositionRead(buf_ptr, static_cast<size_t>(length) - 
total_bytes_read,
-                               static_cast<off_t>(offset + total_bytes_read), 
&last_bytes_read);
+            file_->PositionRead(buf_ptr, static_cast<size_t>(length) - 
total_bytes_read,
+                                static_cast<off_t>(offset + total_bytes_read), 
&last_bytes_read);
         if (!status.ok()) {
           throw ParseError("Error reading the file: " + status.ToString());
         }
@@ -162,7 +162,7 @@ namespace orc {
     }
 
     const std::string& getName() const override {
-      return filename;
+      return filename_;
     }
 
     ~HdfsFileInputStream() override;
diff --git a/cmake_modules/CheckFormat.cmake b/cmake_modules/CheckFormat.cmake
new file mode 100644
index 000000000..1aff5d765
--- /dev/null
+++ b/cmake_modules/CheckFormat.cmake
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Needed for linting targets, etc.
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3)
+set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
+
+set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/c++/build-support")
+
+find_program(CLANG_FORMAT_BIN
+        NAMES clang-format-13
+        HINTS ${CLANG_SEARCH_PATH})
+
+find_program(CLANG_TIDY_BIN
+        NAMES clang-tidy-13
+        HINTS ${CLANG_SEARCH_PATH})
+
+find_program(CLANG_APPLY_REPLACEMENTS_BIN
+        NAMES clang-apply-replacements-13
+        HINTS ${CLANG_SEARCH_PATH})
+
+
+if("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND")
+        message(WARNING "Couldn't find clang-format.")
+else()
+        message(STATUS "Found clang-format at ${CLANG_FORMAT_BIN}")
+endif()
+
+if("${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND")
+        message(WARNING "Couldn't find clang-tidy.")
+else()
+        # Output compile_commands.json
+        set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+        message(STATUS "Found clang-tidy at ${CLANG_TIDY_BIN}")
+endif()
+
+if("${CLANG_APPLY_REPLACEMENTS_BIN}" STREQUAL 
"CLANG_APPLY_REPLACEMENTS_BIN-NOTFOUND")
+        message(WARNING "Couldn't find clang-apply-replacements.")
+else()
+        # Output compile_commands.json
+        set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
+        message(STATUS "Found clang-apply-replacements at 
${CLANG_APPLY_REPLACEMENTS_BIN}")
+endif()
+
+if(NOT LINT_EXCLUSIONS_FILE)
+        # source files matching a glob from a line in this file
+        # will be excluded from linting (cpplint, clang-tidy, clang-format)
+        set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
+endif()
+
+# runs clang-tidy and exits with a non-zero exit code if any errors are found.
+# note that clang-tidy automatically looks for a .clang-tidy file in parent 
directories
+add_custom_target(check-clang-tidy
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_tidy.py # run LLVM's clang-tidy script
+        -clang-tidy-binary ${CLANG_TIDY_BIN} # using our clang-tidy binary
+        -p ${CMAKE_BINARY_DIR} # using cmake's generated compile commands
+)
+
+add_custom_target(fix-clang-tidy
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_tidy.py # run LLVM's clang-tidy script
+        -clang-tidy-binary ${CLANG_TIDY_BIN} # using our clang-tidy binary
+        -p ${CMAKE_BINARY_DIR} # using cmake's generated compile commands
+        -clang-apply-replacements-binary ${CLANG_APPLY_REPLACEMENTS_BIN} # 
using our clang-apply-replacements binary
+        -fix # apply suggested changes generated by clang-tidy
+)
+
+string(CONCAT ORC_FORMAT_DIRS
+        "${CMAKE_SOURCE_DIR}/c++,"
+        "${CMAKE_SOURCE_DIR}/tools,"
+)
+
+add_custom_target(format 
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_format.py
+        ${CLANG_FORMAT_BIN}
+        --source_dirs
+        ${ORC_FORMAT_DIRS}
+        --fix
+)
+
+# Runs clang format and exits with a non-zero exit code if any files need to 
be reformatted
+add_custom_target(check-format 
+        ${PYTHON_EXECUTABLE}
+        ${BUILD_SUPPORT_DIR}/run_clang_format.py
+        ${CLANG_FORMAT_BIN}
+        --source_dirs
+        ${ORC_FORMAT_DIRS}
+)
\ No newline at end of file

Reply via email to