[ https://issues.apache.org/jira/browse/PARQUET-1165?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16286492#comment-16286492 ]
ASF GitHub Bot commented on PARQUET-1165: ----------------------------------------- wesm closed pull request #420: PARQUET-1165: Pin clang-format version to 4.0 URL: https://github.com/apache/parquet-cpp/pull/420 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/CMakeLists.txt b/CMakeLists.txt index c524ceb5..c53a6e33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,6 +84,7 @@ enable_testing() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules") set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support") +set(CLANG_FORMAT_VERSION "4.0") find_package(ClangTools) if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR CLANG_TIDY_FOUND) # Generate a Clang compile_commands.json "compilation database" file for use @@ -483,16 +484,19 @@ endif (UNIX) # "make format" and "make check-format" targets ############################################################ -if (${CLANG_FORMAT_FOUND}) - # runs clang format and updates files in place. - add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1 - `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`) +# runs clang format and updates files in place. +add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py + ${CLANG_FORMAT_BIN} + ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt + ${CMAKE_CURRENT_SOURCE_DIR}/src) - # runs clang format and exits with a non-zero exit code if any files need to be reformatted - add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 0 - `find ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}/tools ${CMAKE_CURRENT_SOURCE_DIR}/examples ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`) -endif() +# runs clang format and exits with a non-zero exit code if any files need to be reformatted +# TODO(wesm): Make this work in run_clang_format.py +add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run_clang_format.py + ${CLANG_FORMAT_BIN} + ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt + ${CMAKE_CURRENT_SOURCE_DIR}/src 1) ############################################################ # "make clang-tidy" and "make check-clang-tidy" targets diff --git a/build-support/clang_format_exclusions.txt b/build-support/clang_format_exclusions.txt new file mode 100644 index 00000000..ce72d2f1 --- /dev/null +++ b/build-support/clang_format_exclusions.txt @@ -0,0 +1 @@ +*_generated* diff --git a/build-support/run_clang_format.py b/build-support/run_clang_format.py new file mode 100755 index 00000000..6dec34bd --- /dev/null +++ b/build-support/run_clang_format.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import fnmatch +import os +import subprocess +import sys + +if len(sys.argv) < 4: + sys.stderr.write("Usage: %s $CLANG_FORMAT_VERSION exclude_globs.txt " + "$source_dir\n" % + sys.argv[0]) + sys.exit(1) + +CLANG_FORMAT = sys.argv[1] +EXCLUDE_GLOBS_FILENAME = sys.argv[2] +SOURCE_DIR = sys.argv[3] + +if len(sys.argv) > 4: + CHECK_FORMAT = int(sys.argv[4]) == 1 +else: + CHECK_FORMAT = False + + +exclude_globs = [line.strip() for line in open(EXCLUDE_GLOBS_FILENAME, "r")] + +files_to_format = [] +matches = [] +for directory, subdirs, files in os.walk(SOURCE_DIR): + for name in files: + name = os.path.join(directory, name) + if not (name.endswith('.h') or name.endswith('.cc')): + continue + + excluded = False + for g in exclude_globs: + if fnmatch.fnmatch(name, g): + excluded = True + break + if not excluded: + files_to_format.append(name) + +if CHECK_FORMAT: + output = subprocess.check_output([CLANG_FORMAT, '-output-replacements-xml'] + + files_to_format, + stderr=subprocess.STDOUT).decode('utf8') + + to_fix = [] + for line in output.split('\n'): + if 'offset' in line: + to_fix.append(line) + + if len(to_fix) > 0: + print("clang-format checks failed, run 'make format' to fix") + sys.exit(-1) +else: + try: + cmd = [CLANG_FORMAT, '-i'] + files_to_format + subprocess.check_output(cmd, stderr=subprocess.STDOUT) + except Exception as e: + print(e) + print(' '.join(cmd)) + raise diff --git a/cmake_modules/FindClangTools.cmake b/cmake_modules/FindClangTools.cmake index e4ee984e..e9221ff2 100644 --- a/cmake_modules/FindClangTools.cmake +++ b/cmake_modules/FindClangTools.cmake @@ -31,7 +31,12 @@ # CLANG_TIDY_FOUND, Whether clang format was found find_program(CLANG_TIDY_BIN - NAMES clang-tidy-3.9 clang-tidy-3.8 clang-tidy-3.7 clang-tidy-3.6 clang-tidy + NAMES clang-tidy-4.0 + clang-tidy-3.9 + clang-tidy-3.8 + clang-tidy-3.7 + clang-tidy-3.6 + clang-tidy PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin NO_DEFAULT_PATH ) @@ -44,11 +49,47 @@ else() message("clang-tidy found at ${CLANG_TIDY_BIN}") endif() -find_program(CLANG_FORMAT_BIN - NAMES clang-format-3.9 clang-format-3.8 clang-format-3.7 clang-format-3.6 clang-format - PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin - NO_DEFAULT_PATH -) +if (CLANG_FORMAT_VERSION) + find_program(CLANG_FORMAT_BIN + NAMES clang-format-${CLANG_FORMAT_VERSION} + PATHS + ${ClangTools_PATH} + $ENV{CLANG_TOOLS_PATH} + /usr/local/bin /usr/bin + NO_DEFAULT_PATH + ) + + # If not found yet, search alternative locations + if (("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND") AND APPLE) + # Homebrew ships older LLVM versions in /usr/local/opt/llvm@version/ + STRING(REGEX REPLACE "^([0-9]+)\\.[0-9]+" "\\1" CLANG_FORMAT_MAJOR_VERSION "${CLANG_FORMAT_VERSION}") + STRING(REGEX REPLACE "^[0-9]+\\.([0-9]+)" "\\1" CLANG_FORMAT_MINOR_VERSION "${CLANG_FORMAT_VERSION}") + if ("${CLANG_FORMAT_MINOR_VERSION}" STREQUAL "0") + find_program(CLANG_FORMAT_BIN + NAMES clang-format + PATHS /usr/local/opt/llvm@${CLANG_FORMAT_MAJOR_VERSION}/bin + NO_DEFAULT_PATH + ) + else() + find_program(CLANG_FORMAT_BIN + NAMES clang-format + PATHS /usr/local/opt/llvm@${CLANG_FORMAT_VERSION}/bin + NO_DEFAULT_PATH + ) + endif() + endif() +else() + find_program(CLANG_FORMAT_BIN + NAMES clang-format-4.0 + clang-format-3.9 + clang-format-3.8 + clang-format-3.7 + clang-format-3.6 + clang-format + PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin + NO_DEFAULT_PATH + ) +endif() if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" ) set(CLANG_FORMAT_FOUND 0) diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh index f9ad1a39..4d3b7520 100755 --- a/dev/release/run-rat.sh +++ b/dev/release/run-rat.sh @@ -31,6 +31,7 @@ $RAT $1 \ -e asan_symbolize.py \ -e cpplint.py \ -e pax_global_header \ + -e clang_format_exclusions.txt \ > rat.txt cat rat.txt UNAPPROVED=`cat rat.txt | grep "Unknown Licenses" | head -n 1 | cut -d " " -f 1` diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc index 0e0831ec..751ed49e 100644 --- a/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/src/parquet/arrow/arrow-reader-writer-test.cc @@ -23,8 +23,8 @@ #include "gtest/gtest.h" -#include <sstream> #include <arrow/compute/api.h> +#include <sstream> #include "parquet/api/reader.h" #include "parquet/api/writer.h" diff --git a/src/parquet/arrow/arrow-schema-test.cc b/src/parquet/arrow/arrow-schema-test.cc index 7ed9ad83..129eccfd 100644 --- a/src/parquet/arrow/arrow-schema-test.cc +++ b/src/parquet/arrow/arrow-schema-test.cc @@ -62,8 +62,8 @@ class TestConvertParquetSchema : public ::testing::Test { for (int i = 0; i < expected_schema->num_fields(); ++i) { auto lhs = result_schema_->field(i); auto rhs = expected_schema->field(i); - EXPECT_TRUE(lhs->Equals(rhs)) - << i << " " << lhs->ToString() << " != " << rhs->ToString(); + EXPECT_TRUE(lhs->Equals(rhs)) << i << " " << lhs->ToString() + << " != " << rhs->ToString(); } } diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc index 4ec48a45..9b9bde9f 100644 --- a/src/parquet/file/reader.cc +++ b/src/parquet/file/reader.cc @@ -45,9 +45,9 @@ RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents) : contents_(std::move(contents)) {} std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) { - DCHECK(i < metadata()->num_columns()) - << "The RowGroup only has " << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " + << metadata()->num_columns() + << "columns, requested column: " << i; const ColumnDescriptor* descr = metadata()->schema()->Column(i); std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i); @@ -57,9 +57,9 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) { } std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) { - DCHECK(i < metadata()->num_columns()) - << "The RowGroup only has " << metadata()->num_columns() - << "columns, requested column: " << i; + DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " + << metadata()->num_columns() + << "columns, requested column: " << i; return contents_->GetColumnPageReader(i); } @@ -127,9 +127,9 @@ std::shared_ptr<FileMetaData> ParquetFileReader::metadata() const { } std::shared_ptr<RowGroupReader> ParquetFileReader::RowGroup(int i) { - DCHECK(i < metadata()->num_row_groups()) - << "The file only has " << metadata()->num_row_groups() - << "row groups, requested reader for: " << i; + DCHECK(i < metadata()->num_row_groups()) << "The file only has " + << metadata()->num_row_groups() + << "row groups, requested reader for: " << i; return contents_->GetRowGroup(i); } diff --git a/src/parquet/statistics-test.cc b/src/parquet/statistics-test.cc index 26d53d32..fc905b51 100644 --- a/src/parquet/statistics-test.cc +++ b/src/parquet/statistics-test.cc @@ -194,9 +194,8 @@ bool* TestRowGroupStatistics<BooleanType>::GetValuesPointer(std::vector<bool>& v } template <typename TestType> -typename std::vector<typename TestType::c_type> -TestRowGroupStatistics<TestType>::GetDeepCopy( - const std::vector<typename TestType::c_type>& values) { +typename std::vector<typename TestType::c_type> TestRowGroupStatistics< + TestType>::GetDeepCopy(const std::vector<typename TestType::c_type>& values) { return values; } ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > [C++] Pin clang-format version to 4.0 > ------------------------------------- > > Key: PARQUET-1165 > URL: https://issues.apache.org/jira/browse/PARQUET-1165 > Project: Parquet > Issue Type: Improvement > Components: parquet-cpp > Reporter: Wes McKinney > Assignee: Uwe L. Korn > Fix For: cpp-1.4.0 > > -- This message was sent by Atlassian JIRA (v6.4.14#64029)