Repository: parquet-cpp Updated Branches: refs/heads/master dbf1cf1cc -> cc5c120df
PARQUET-496: Fix cpplint configuration to catch more style errors We were using `--verbose=4` which catches only egregious style errors. `--verbose=2` with a few warning types suppressed is much more reasonable. This patch updates the cmake configuration and fixes the existing style errors to get the build passing. Author: Wes McKinney <[email protected]> Closes #33 from wesm/PARQUET-496 and squashes the following commits: de674d4 [Wes McKinney] Use --verbose=2 for cpplint and fix outstanding style issues Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/cc5c120d Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/cc5c120d Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/cc5c120d Branch: refs/heads/master Commit: cc5c120dfcf8a99022708a38f727c034077a8425 Parents: dbf1cf1 Author: Wes McKinney <[email protected]> Authored: Mon Feb 1 12:04:16 2016 -0800 Committer: Nong Li <[email protected]> Committed: Mon Feb 1 12:04:16 2016 -0800 ---------------------------------------------------------------------- CMakeLists.txt | 5 +++-- src/parquet/column/reader.cc | 10 ++++++---- src/parquet/column/reader.h | 1 - src/parquet/column/scanner.cc | 1 - src/parquet/column/scanner.h | 8 ++++---- src/parquet/encodings/delta-bit-pack-encoding.h | 2 -- src/parquet/encodings/plain-encoding.h | 4 ++-- src/parquet/reader-test.cc | 5 +++-- src/parquet/reader.cc | 10 ++++++---- src/parquet/util/bit-util.h | 1 - src/parquet/util/input_stream.cc | 2 +- 11 files changed, 25 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 94e73dd..5298e0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -196,8 +196,9 @@ include_directories( if (UNIX) # Full lint add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py - --verbose=4 - --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/include_order + --verbose=2 + --linelength=90 + --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/include_order,-runtime/references,-readability/check `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/parquet\\/thrift/g'`) endif (UNIX) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/column/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc index bc19464..edfea49 100644 --- a/src/parquet/column/reader.cc +++ b/src/parquet/column/reader.cc @@ -43,7 +43,6 @@ ColumnReader::ColumnReader(const parquet::ColumnMetaData* metadata, stream_(std::move(stream)), num_buffered_values_(0), num_decoded_values_(0) { - switch (metadata->codec) { case CompressionCodec::UNCOMPRESSED: break; @@ -103,7 +102,8 @@ bool TypedColumnReader<TYPE>::ReadNewPage() { PlainDecoder<TYPE> dictionary(schema_); dictionary.SetData(current_page_header_.dictionary_page_header.num_values, buffer, uncompressed_len); - std::shared_ptr<DecoderType> decoder(new DictionaryDecoder<TYPE>(schema_, &dictionary)); + std::shared_ptr<DecoderType> decoder( + new DictionaryDecoder<TYPE>(schema_, &dictionary)); decoders_[Encoding::RLE_DICTIONARY] = decoder; current_decoder_ = decoders_[Encoding::RLE_DICTIONARY].get(); @@ -222,9 +222,11 @@ std::shared_ptr<ColumnReader> ColumnReader::Make(const parquet::ColumnMetaData* case Type::DOUBLE: return std::make_shared<DoubleReader>(metadata, element, std::move(stream)); case Type::BYTE_ARRAY: - return std::make_shared<ByteArrayReader>(metadata, element, std::move(stream)); + return std::make_shared<ByteArrayReader>(metadata, element, + std::move(stream)); case Type::FIXED_LEN_BYTE_ARRAY: - return std::make_shared<FixedLenByteArrayReader>(metadata, element, std::move(stream)); + return std::make_shared<FixedLenByteArrayReader>(metadata, element, + std::move(stream)); default: ParquetException::NYI("type reader not implemented"); } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/column/reader.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h index 0976836..8f857c4 100644 --- a/src/parquet/column/reader.h +++ b/src/parquet/column/reader.h @@ -52,7 +52,6 @@ class Scanner; class ColumnReader { public: - struct Config { int batch_size; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/column/scanner.cc ---------------------------------------------------------------------- diff --git a/src/parquet/column/scanner.cc b/src/parquet/column/scanner.cc index 158430b..b263d1e 100644 --- a/src/parquet/column/scanner.cc +++ b/src/parquet/column/scanner.cc @@ -51,7 +51,6 @@ std::shared_ptr<Scanner> Scanner::Make(std::shared_ptr<ColumnReader> col_reader, } // Unreachable code, but supress compiler warning return std::shared_ptr<Scanner>(nullptr); - } } // namespace parquet_cpp http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/column/scanner.h ---------------------------------------------------------------------- diff --git a/src/parquet/column/scanner.h b/src/parquet/column/scanner.h index 64de021..8c47a8c 100644 --- a/src/parquet/column/scanner.h +++ b/src/parquet/column/scanner.h @@ -101,8 +101,8 @@ class TypedScanner : public Scanner { bool NextLevels(int16_t* def_level, int16_t* rep_level) { if (level_offset_ == levels_buffered_) { - levels_buffered_ = typed_reader_->ReadBatch(batch_size_, &def_levels_[0], &rep_levels_[0], - values_, &values_buffered_); + levels_buffered_ = typed_reader_->ReadBatch(batch_size_, &def_levels_[0], + &rep_levels_[0], values_, &values_buffered_); // TODO: repetition levels @@ -151,9 +151,9 @@ class TypedScanner : public Scanner { if (is_null) { std::string null_fmt = format_fwf<parquet::Type::BYTE_ARRAY>(width); - snprintf(buffer, 25, null_fmt.c_str(), "NULL"); + snprintf(buffer, sizeof(buffer), null_fmt.c_str(), "NULL"); } else { - FormatValue(&val, buffer, 25, width); + FormatValue(&val, buffer, sizeof(buffer), width); } out << buffer; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/encodings/delta-bit-pack-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/delta-bit-pack-encoding.h b/src/parquet/encodings/delta-bit-pack-encoding.h index 9f0681d..a0833b5 100644 --- a/src/parquet/encodings/delta-bit-pack-encoding.h +++ b/src/parquet/encodings/delta-bit-pack-encoding.h @@ -32,7 +32,6 @@ class DeltaBitPackDecoder : public Decoder<TYPE> { explicit DeltaBitPackDecoder(const parquet::SchemaElement* schema) : Decoder<TYPE>(schema, parquet::Encoding::DELTA_BINARY_PACKED) { - parquet::Type::type type = type_traits<TYPE>::parquet_type; if (type != parquet::Type::INT32 && type != parquet::Type::INT64) { @@ -117,7 +116,6 @@ class DeltaBitPackDecoder : public Decoder<TYPE> { int64_t last_value_; }; - } // namespace parquet_cpp #endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/encodings/plain-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/plain-encoding.h b/src/parquet/encodings/plain-encoding.h index dc71e39..e8f8977 100644 --- a/src/parquet/encodings/plain-encoding.h +++ b/src/parquet/encodings/plain-encoding.h @@ -76,8 +76,8 @@ inline int PlainDecoder<parquet::Type::BYTE_ARRAY>::Decode(ByteArray* buffer, // Template specialization for FIXED_LEN_BYTE_ARRAY template <> -inline int PlainDecoder<parquet::Type::FIXED_LEN_BYTE_ARRAY>::Decode(FixedLenByteArray* buffer, - int max_values) { +inline int PlainDecoder<parquet::Type::FIXED_LEN_BYTE_ARRAY>::Decode( + FixedLenByteArray* buffer, int max_values) { max_values = std::min(max_values, num_values_); int len = schema_->type_length; for (int i = 0; i < max_values; ++i) { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/reader-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/reader-test.cc b/src/parquet/reader-test.cc index 9952c62..49f25f0 100644 --- a/src/parquet/reader-test.cc +++ b/src/parquet/reader-test.cc @@ -60,7 +60,8 @@ TEST_F(TestAllTypesPlain, TestBatchRead) { RowGroupReader* group = reader_.RowGroup(0); // column 0, id - std::shared_ptr<Int32Reader> col = std::dynamic_pointer_cast<Int32Reader>(group->Column(0)); + std::shared_ptr<Int32Reader> col = + std::dynamic_pointer_cast<Int32Reader>(group->Column(0)); int16_t def_levels[4]; int16_t rep_levels[4]; @@ -120,7 +121,7 @@ TEST_F(TestAllTypesPlain, DebugPrintWorks) { reader_.DebugPrint(ss); std::string result = ss.str(); - ASSERT_TRUE(result.size() > 0); + ASSERT_GT(result.size(), 0); } } // namespace parquet_cpp http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/reader.cc b/src/parquet/reader.cc index 823549d..a43a2a5 100644 --- a/src/parquet/reader.cc +++ b/src/parquet/reader.cc @@ -105,7 +105,8 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(size_t i) { source->Seek(col_start); // TODO(wesm): Law of demeter violation - ScopedInMemoryInputStream* scoped_input = static_cast<ScopedInMemoryInputStream*>(input.get()); + ScopedInMemoryInputStream* scoped_input = + static_cast<ScopedInMemoryInputStream*>(input.get()); size_t bytes_read = source->Read(scoped_input->size(), scoped_input->data()); if (bytes_read != scoped_input->size()) { std::cout << "Bytes needed: " << col.meta_data.total_compressed_size << std::endl; @@ -165,7 +166,8 @@ RowGroupReader* ParquetFileReader::RowGroup(size_t i) { } // Construct the RowGroupReader - row_group_readers_[i] = std::make_shared<RowGroupReader>(this, &metadata_.row_groups[i]); + row_group_readers_[i] = std::make_shared<RowGroupReader>(this, + &metadata_.row_groups[i]); return row_group_readers_[i].get(); } @@ -272,9 +274,9 @@ void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) { << ": " << meta_data->num_values << " rows, " << meta_data->statistics.null_count << " null values, " << meta_data->statistics.distinct_count << " distinct values, " - << "min value: " << (meta_data->statistics.min.length()>0 ? + << "min value: " << (meta_data->statistics.min.length() > 0 ? meta_data->statistics.min : "N/A") - << ", max value: " << (meta_data->statistics.max.length()>0 ? + << ", max value: " << (meta_data->statistics.max.length() > 0 ? meta_data->statistics.max : "N/A") << ".\n"; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/util/bit-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/bit-util.h b/src/parquet/util/bit-util.h index 8451862..593d1c2 100644 --- a/src/parquet/util/bit-util.h +++ b/src/parquet/util/bit-util.h @@ -168,7 +168,6 @@ class BitUtil { static inline int16_t FromBigEndian(int16_t val) { return val; } static inline uint16_t FromBigEndian(uint16_t val) { return val; } #endif - }; } // namespace parquet_cpp http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/cc5c120d/src/parquet/util/input_stream.cc ---------------------------------------------------------------------- diff --git a/src/parquet/util/input_stream.cc b/src/parquet/util/input_stream.cc index d0e53ed..281a342 100644 --- a/src/parquet/util/input_stream.cc +++ b/src/parquet/util/input_stream.cc @@ -24,7 +24,7 @@ namespace parquet_cpp { InMemoryInputStream::InMemoryInputStream(const uint8_t* buffer, int64_t len) : - buffer_(buffer), len_(len), offset_(0) {} + buffer_(buffer), len_(len), offset_(0) {} const uint8_t* InMemoryInputStream::Peek(int num_to_peek, int* num_bytes) { *num_bytes = std::min(static_cast<int64_t>(num_to_peek), len_ - offset_);
