Repository: parquet-cpp Updated Branches: refs/heads/master ad56e7aea -> 782049bac
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/encodings/dictionary-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/dictionary-encoding.h b/src/parquet/encodings/dictionary-encoding.h deleted file mode 100644 index 7128500..0000000 --- a/src/parquet/encodings/dictionary-encoding.h +++ /dev/null @@ -1,473 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef PARQUET_DICTIONARY_ENCODING_H -#define PARQUET_DICTIONARY_ENCODING_H - -#include <algorithm> -#include <cstdint> -#include <iostream> -#include <limits> -#include <vector> - -#include "parquet/encodings/decoder.h" -#include "parquet/encodings/encoder.h" -#include "parquet/encodings/plain-encoding.h" -#include "parquet/util/bit-util.h" -#include "parquet/util/cpu-info.h" -#include "parquet/util/hash-util.h" -#include "parquet/util/memory.h" -#include "parquet/util/rle-encoding.h" - -namespace parquet { - -template <typename Type> -class DictionaryDecoder : public Decoder<Type> { - public: - typedef typename Type::c_type T; - - // Initializes the dictionary with values from 'dictionary'. The data in - // dictionary is not guaranteed to persist in memory after this call so the - // dictionary decoder needs to copy the data out if necessary. - explicit DictionaryDecoder( - const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) - : Decoder<Type>(descr, Encoding::RLE_DICTIONARY), - dictionary_(0, allocator), - byte_array_data_(AllocateBuffer(allocator, 0)) {} - - // Perform type-specific initiatialization - void SetDict(Decoder<Type>* dictionary); - - void SetData(int num_values, const uint8_t* data, int len) override { - num_values_ = num_values; - if (len == 0) return; - uint8_t bit_width = *data; - ++data; - --len; - idx_decoder_ = RleDecoder(data, len, bit_width); - } - - int Decode(T* buffer, int max_values) override { - max_values = std::min(max_values, num_values_); - int decoded_values = idx_decoder_.GetBatchWithDict(dictionary_, buffer, max_values); - if (decoded_values != max_values) { ParquetException::EofException(); } - num_values_ -= max_values; - return max_values; - } - - int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits, - int64_t valid_bits_offset) override { - int decoded_values = idx_decoder_.GetBatchWithDictSpaced( - dictionary_, buffer, num_values, null_count, valid_bits, valid_bits_offset); - if (decoded_values != num_values) { ParquetException::EofException(); } - return decoded_values; - } - - private: - using Decoder<Type>::num_values_; - - // Only one is set. - Vector<T> dictionary_; - - // Data that contains the byte array data (byte_array_dictionary_ just has the - // pointers). - std::shared_ptr<PoolBuffer> byte_array_data_; - - RleDecoder idx_decoder_; -}; - -template <typename Type> -inline void DictionaryDecoder<Type>::SetDict(Decoder<Type>* dictionary) { - int num_dictionary_values = dictionary->values_left(); - dictionary_.Resize(num_dictionary_values); - dictionary->Decode(&dictionary_[0], num_dictionary_values); -} - -template <> -inline void DictionaryDecoder<BooleanType>::SetDict(Decoder<BooleanType>* dictionary) { - ParquetException::NYI("Dictionary encoding is not implemented for boolean values"); -} - -template <> -inline void DictionaryDecoder<ByteArrayType>::SetDict( - Decoder<ByteArrayType>* dictionary) { - int num_dictionary_values = dictionary->values_left(); - dictionary_.Resize(num_dictionary_values); - dictionary->Decode(&dictionary_[0], num_dictionary_values); - - int total_size = 0; - for (int i = 0; i < num_dictionary_values; ++i) { - total_size += dictionary_[i].len; - } - PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size)); - int offset = 0; - - uint8_t* bytes_data = byte_array_data_->mutable_data(); - for (int i = 0; i < num_dictionary_values; ++i) { - memcpy(bytes_data + offset, dictionary_[i].ptr, dictionary_[i].len); - dictionary_[i].ptr = bytes_data + offset; - offset += dictionary_[i].len; - } -} - -template <> -inline void DictionaryDecoder<FLBAType>::SetDict(Decoder<FLBAType>* dictionary) { - int num_dictionary_values = dictionary->values_left(); - dictionary_.Resize(num_dictionary_values); - dictionary->Decode(&dictionary_[0], num_dictionary_values); - - int fixed_len = descr_->type_length(); - int total_size = num_dictionary_values * fixed_len; - - PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size)); - uint8_t* bytes_data = byte_array_data_->mutable_data(); - int offset = 0; - for (int i = 0; i < num_dictionary_values; ++i) { - memcpy(bytes_data + offset, dictionary_[i].ptr, fixed_len); - dictionary_[i].ptr = bytes_data + offset; - offset += fixed_len; - } -} - -// ---------------------------------------------------------------------- -// Dictionary encoder - -// Initially imported from Apache Impala on 2016-02-22, and has been modified -// since for parquet-cpp - -// Initially 1024 elements -static constexpr int INITIAL_HASH_TABLE_SIZE = 1 << 10; - -typedef int32_t hash_slot_t; -static constexpr hash_slot_t HASH_SLOT_EMPTY = std::numeric_limits<int32_t>::max(); - -// The maximum load factor for the hash table before resizing. -static constexpr double MAX_HASH_LOAD = 0.7; - -/// See the dictionary encoding section of https://github.com/Parquet/parquet-format. -/// The encoding supports streaming encoding. Values are encoded as they are added while -/// the dictionary is being constructed. At any time, the buffered values can be -/// written out with the current dictionary size. More values can then be added to -/// the encoder, including new dictionary entries. -template <typename DType> -class DictEncoder : public Encoder<DType> { - public: - typedef typename DType::c_type T; - - explicit DictEncoder(const ColumnDescriptor* desc, ChunkedAllocator* pool = nullptr, - MemoryAllocator* allocator = default_allocator()) - : Encoder<DType>(desc, Encoding::PLAIN_DICTIONARY, allocator), - allocator_(allocator), - pool_(pool), - hash_table_size_(INITIAL_HASH_TABLE_SIZE), - mod_bitmask_(hash_table_size_ - 1), - hash_slots_(0, allocator), - dict_encoded_size_(0), - type_length_(desc->type_length()) { - hash_slots_.Assign(hash_table_size_, HASH_SLOT_EMPTY); - if (!CpuInfo::initialized()) { CpuInfo::Init(); } - } - - virtual ~DictEncoder() { DCHECK(buffered_indices_.empty()); } - - // TODO(wesm): think about how to address the construction semantics in - // encodings/dictionary-encoding.h - void set_mem_pool(ChunkedAllocator* pool) { pool_ = pool; } - - void set_type_length(int type_length) { type_length_ = type_length; } - - /// Returns a conservative estimate of the number of bytes needed to encode the buffered - /// indices. Used to size the buffer passed to WriteIndices(). - int64_t EstimatedDataEncodedSize() override { - // Note: because of the way RleEncoder::CheckBufferFull() is called, we have to - // reserve - // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used - // but not reserving them would cause the encoder to fail. - return 1 + RleEncoder::MaxBufferSize(bit_width(), buffered_indices_.size()) + - RleEncoder::MinBufferSize(bit_width()); - } - - /// The minimum bit width required to encode the currently buffered indices. - int bit_width() const { - if (UNLIKELY(num_entries() == 0)) return 0; - if (UNLIKELY(num_entries() == 1)) return 1; - return BitUtil::Log2(num_entries()); - } - - /// Writes out any buffered indices to buffer preceded by the bit width of this data. - /// Returns the number of bytes written. - /// If the supplied buffer is not big enough, returns -1. - /// buffer must be preallocated with buffer_len bytes. Use EstimatedDataEncodedSize() - /// to size buffer. - int WriteIndices(uint8_t* buffer, int buffer_len); - - int hash_table_size() { return hash_table_size_; } - int dict_encoded_size() { return dict_encoded_size_; } - /// Clears all the indices (but leaves the dictionary). - void ClearIndices() { buffered_indices_.clear(); } - - /// Encode value. Note that this does not actually write any data, just - /// buffers the value's index to be written later. - void Put(const T& value); - - std::shared_ptr<Buffer> FlushValues() override { - std::shared_ptr<PoolBuffer> buffer = - AllocateBuffer(this->allocator_, EstimatedDataEncodedSize()); - int result_size = WriteIndices(buffer->mutable_data(), EstimatedDataEncodedSize()); - ClearIndices(); - PARQUET_THROW_NOT_OK(buffer->Resize(result_size)); - return buffer; - }; - - void Put(const T* values, int num_values) override { - for (int i = 0; i < num_values; i++) { - Put(values[i]); - } - } - - void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, - int64_t valid_bits_offset) override { - INIT_BITSET(valid_bits, valid_bits_offset); - for (int32_t i = 0; i < num_values; i++) { - if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { Put(src[i]); } - READ_NEXT_BITSET(valid_bits); - } - } - - /// Writes out the encoded dictionary to buffer. buffer must be preallocated to - /// dict_encoded_size() bytes. - void WriteDict(uint8_t* buffer); - - ChunkedAllocator* mem_pool() { return pool_; } - - /// The number of entries in the dictionary. - int num_entries() const { return uniques_.size(); } - - private: - MemoryAllocator* allocator_; - - // For ByteArray / FixedLenByteArray data. Not owned - ChunkedAllocator* pool_; - - /// Size of the table. Must be a power of 2. - int hash_table_size_; - - // Store hash_table_size_ - 1, so that j & mod_bitmask_ is equivalent to j % - // hash_table_size_, but uses far fewer CPU cycles - int mod_bitmask_; - - // We use a fixed-size hash table with linear probing - // - // These values correspond to the uniques_ array - Vector<hash_slot_t> hash_slots_; - - /// Indices that have not yet be written out by WriteIndices(). - std::vector<int> buffered_indices_; - - /// The number of bytes needed to encode the dictionary. - int dict_encoded_size_; - - // The unique observed values - std::vector<T> uniques_; - - bool SlotDifferent(const T& v, hash_slot_t slot); - void DoubleTableSize(); - - /// Size of each encoded dictionary value. -1 for variable-length types. - int type_length_; - - /// Hash function for mapping a value to a bucket. - inline int Hash(const T& value) const; - - /// Adds value to the hash table and updates dict_encoded_size_ - void AddDictKey(const T& value); -}; - -template <typename DType> -inline int DictEncoder<DType>::Hash(const typename DType::c_type& value) const { - return HashUtil::Hash(&value, sizeof(value), 0); -} - -template <> -inline int DictEncoder<ByteArrayType>::Hash(const ByteArray& value) const { - if (value.len > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL"; } - return HashUtil::Hash(value.ptr, value.len, 0); -} - -template <> -inline int DictEncoder<FLBAType>::Hash(const FixedLenByteArray& value) const { - if (type_length_ > 0) { DCHECK(nullptr != value.ptr) << "Value ptr cannot be NULL"; } - return HashUtil::Hash(value.ptr, type_length_, 0); -} - -template <typename DType> -inline bool DictEncoder<DType>::SlotDifferent( - const typename DType::c_type& v, hash_slot_t slot) { - return v != uniques_[slot]; -} - -template <> -inline bool DictEncoder<FLBAType>::SlotDifferent( - const FixedLenByteArray& v, hash_slot_t slot) { - return 0 != memcmp(v.ptr, uniques_[slot].ptr, type_length_); -} - -template <typename DType> -inline void DictEncoder<DType>::Put(const typename DType::c_type& v) { - int j = Hash(v) & mod_bitmask_; - hash_slot_t index = hash_slots_[j]; - - // Find an empty slot - while (HASH_SLOT_EMPTY != index && SlotDifferent(v, index)) { - // Linear probing - ++j; - if (j == hash_table_size_) j = 0; - index = hash_slots_[j]; - } - - if (index == HASH_SLOT_EMPTY) { - // Not in the hash table, so we insert it now - index = uniques_.size(); - hash_slots_[j] = index; - AddDictKey(v); - - if (UNLIKELY(static_cast<int>(uniques_.size()) > hash_table_size_ * MAX_HASH_LOAD)) { - DoubleTableSize(); - } - } - - buffered_indices_.push_back(index); -} - -template <typename DType> -inline void DictEncoder<DType>::DoubleTableSize() { - int new_size = hash_table_size_ * 2; - Vector<hash_slot_t> new_hash_slots(0, allocator_); - new_hash_slots.Assign(new_size, HASH_SLOT_EMPTY); - hash_slot_t index, slot; - int j; - for (int i = 0; i < hash_table_size_; ++i) { - index = hash_slots_[i]; - - if (index == HASH_SLOT_EMPTY) { continue; } - - // Compute the hash value mod the new table size to start looking for an - // empty slot - const typename DType::c_type& v = uniques_[index]; - - // Find an empty slot in the new hash table - j = Hash(v) & (new_size - 1); - slot = new_hash_slots[j]; - while (HASH_SLOT_EMPTY != slot && SlotDifferent(v, slot)) { - ++j; - if (j == new_size) j = 0; - slot = new_hash_slots[j]; - } - - // Copy the old slot index to the new hash table - new_hash_slots[j] = index; - } - - hash_table_size_ = new_size; - mod_bitmask_ = new_size - 1; - - hash_slots_.Swap(new_hash_slots); -} - -template <typename DType> -inline void DictEncoder<DType>::AddDictKey(const typename DType::c_type& v) { - uniques_.push_back(v); - dict_encoded_size_ += sizeof(typename DType::c_type); -} - -template <> -inline void DictEncoder<ByteArrayType>::AddDictKey(const ByteArray& v) { - uint8_t* heap = pool_->Allocate(v.len); - if (UNLIKELY(v.len > 0 && heap == nullptr)) { throw ParquetException("out of memory"); } - memcpy(heap, v.ptr, v.len); - uniques_.push_back(ByteArray(v.len, heap)); - dict_encoded_size_ += v.len + sizeof(uint32_t); -} - -template <> -inline void DictEncoder<FLBAType>::AddDictKey(const FixedLenByteArray& v) { - uint8_t* heap = pool_->Allocate(type_length_); - if (UNLIKELY(type_length_ > 0 && heap == nullptr)) { - throw ParquetException("out of memory"); - } - memcpy(heap, v.ptr, type_length_); - - uniques_.push_back(FixedLenByteArray(heap)); - dict_encoded_size_ += type_length_; -} - -template <typename DType> -inline void DictEncoder<DType>::WriteDict(uint8_t* buffer) { - // For primitive types, only a memcpy - memcpy(buffer, uniques_.data(), sizeof(typename DType::c_type) * uniques_.size()); -} - -template <> -inline void DictEncoder<BooleanType>::WriteDict(uint8_t* buffer) { - // For primitive types, only a memcpy - // memcpy(buffer, uniques_.data(), sizeof(typename DType::c_type) * uniques_.size()); - for (size_t i = 0; i < uniques_.size(); i++) { - buffer[i] = uniques_[i]; - } -} - -// ByteArray and FLBA already have the dictionary encoded in their data heaps -template <> -inline void DictEncoder<ByteArrayType>::WriteDict(uint8_t* buffer) { - for (const ByteArray& v : uniques_) { - memcpy(buffer, reinterpret_cast<const void*>(&v.len), sizeof(uint32_t)); - buffer += sizeof(uint32_t); - if (v.len > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL"; } - memcpy(buffer, v.ptr, v.len); - buffer += v.len; - } -} - -template <> -inline void DictEncoder<FLBAType>::WriteDict(uint8_t* buffer) { - for (const FixedLenByteArray& v : uniques_) { - if (type_length_ > 0) { DCHECK(nullptr != v.ptr) << "Value ptr cannot be NULL"; } - memcpy(buffer, v.ptr, type_length_); - buffer += type_length_; - } -} - -template <typename DType> -inline int DictEncoder<DType>::WriteIndices(uint8_t* buffer, int buffer_len) { - // Write bit width in first byte - *buffer = bit_width(); - ++buffer; - --buffer_len; - - RleEncoder encoder(buffer, buffer_len, bit_width()); - for (int index : buffered_indices_) { - if (!encoder.Put(index)) return -1; - } - encoder.Flush(); - - ClearIndices(); - return 1 + encoder.len(); -} - -} // namespace parquet - -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/encodings/encoder.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/encoder.h b/src/parquet/encodings/encoder.h deleted file mode 100644 index 1c06574..0000000 --- a/src/parquet/encodings/encoder.h +++ /dev/null @@ -1,78 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef PARQUET_ENCODINGS_ENCODER_H -#define PARQUET_ENCODINGS_ENCODER_H - -#include <cstdint> -#include <memory> - -#include "parquet/exception.h" -#include "parquet/types.h" -#include "parquet/util/bit-util.h" -#include "parquet/util/memory.h" - -namespace parquet { - -class ColumnDescriptor; - -// Base class for value encoders. Since encoders may or not have state (e.g., -// dictionary encoding) we use a class instance to maintain any state. -// -// TODO(wesm): Encode interface API is temporary -template <typename DType> -class Encoder { - public: - typedef typename DType::c_type T; - - virtual ~Encoder() {} - - virtual int64_t EstimatedDataEncodedSize() = 0; - virtual std::shared_ptr<Buffer> FlushValues() = 0; - virtual void Put(const T* src, int num_values) = 0; - virtual void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, - int64_t valid_bits_offset) { - PoolBuffer buffer(allocator_); - buffer.Resize(num_values * sizeof(T)); - int32_t num_valid_values = 0; - INIT_BITSET(valid_bits, valid_bits_offset); - T* data = reinterpret_cast<T*>(buffer.mutable_data()); - for (int32_t i = 0; i < num_values; i++) { - if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { - data[num_valid_values++] = src[i]; - } - READ_NEXT_BITSET(valid_bits); - } - Put(data, num_valid_values); - } - - const Encoding::type encoding() const { return encoding_; } - - protected: - explicit Encoder(const ColumnDescriptor* descr, const Encoding::type& encoding, - MemoryAllocator* allocator) - : descr_(descr), encoding_(encoding), allocator_(allocator) {} - - // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY - const ColumnDescriptor* descr_; - const Encoding::type encoding_; - MemoryAllocator* allocator_; -}; - -} // namespace parquet - -#endif // PARQUET_ENCODINGS_ENCODER_H http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/encodings/encoding-benchmark.cc ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/encoding-benchmark.cc b/src/parquet/encodings/encoding-benchmark.cc deleted file mode 100644 index 516e453..0000000 --- a/src/parquet/encodings/encoding-benchmark.cc +++ /dev/null @@ -1,161 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "benchmark/benchmark.h" - -#include "parquet/encodings/dictionary-encoding.h" -#include "parquet/file/reader-internal.h" -#include "parquet/util/memory.h" - -namespace parquet { - -using format::ColumnChunk; -using schema::PrimitiveNode; - -namespace benchmark { - -std::shared_ptr<ColumnDescriptor> Int64Schema(Repetition::type repetition) { - auto node = PrimitiveNode::Make("int64", repetition, Type::INT64); - return std::make_shared<ColumnDescriptor>( - node, repetition != Repetition::REQUIRED, repetition == Repetition::REPEATED); -} - -static void BM_PlainEncodingBoolean(::benchmark::State& state) { - std::vector<bool> values(state.range_x(), 64); - PlainEncoder<BooleanType> encoder(nullptr); - - while (state.KeepRunning()) { - encoder.Put(values, values.size()); - encoder.FlushValues(); - } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(bool)); -} - -BENCHMARK(BM_PlainEncodingBoolean)->Range(1024, 65536); - -static void BM_PlainDecodingBoolean(::benchmark::State& state) { - std::vector<bool> values(state.range_x(), 64); - bool* output = new bool[state.range_x()]; - PlainEncoder<BooleanType> encoder(nullptr); - encoder.Put(values, values.size()); - std::shared_ptr<Buffer> buf = encoder.FlushValues(); - - while (state.KeepRunning()) { - PlainDecoder<BooleanType> decoder(nullptr); - decoder.SetData(values.size(), buf->data(), buf->size()); - decoder.Decode(output, values.size()); - } - - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(bool)); - delete[] output; -} - -BENCHMARK(BM_PlainDecodingBoolean)->Range(1024, 65536); - -static void BM_PlainEncodingInt64(::benchmark::State& state) { - std::vector<int64_t> values(state.range_x(), 64); - PlainEncoder<Int64Type> encoder(nullptr); - - while (state.KeepRunning()) { - encoder.Put(values.data(), values.size()); - encoder.FlushValues(); - } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int64_t)); -} - -BENCHMARK(BM_PlainEncodingInt64)->Range(1024, 65536); - -static void BM_PlainDecodingInt64(::benchmark::State& state) { - std::vector<int64_t> values(state.range_x(), 64); - PlainEncoder<Int64Type> encoder(nullptr); - encoder.Put(values.data(), values.size()); - std::shared_ptr<Buffer> buf = encoder.FlushValues(); - - while (state.KeepRunning()) { - PlainDecoder<Int64Type> decoder(nullptr); - decoder.SetData(values.size(), buf->data(), buf->size()); - decoder.Decode(values.data(), values.size()); - } - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(int64_t)); -} - -BENCHMARK(BM_PlainDecodingInt64)->Range(1024, 65536); - -template <typename Type> -static void DecodeDict( - std::vector<typename Type::c_type>& values, ::benchmark::State& state) { - typedef typename Type::c_type T; - int num_values = values.size(); - - ChunkedAllocator pool; - MemoryAllocator* allocator = default_allocator(); - std::shared_ptr<ColumnDescriptor> descr = Int64Schema(Repetition::REQUIRED); - - DictEncoder<Type> encoder(descr.get(), &pool, allocator); - for (int i = 0; i < num_values; ++i) { - encoder.Put(values[i]); - } - - std::shared_ptr<PoolBuffer> dict_buffer = - AllocateBuffer(allocator, encoder.dict_encoded_size()); - - std::shared_ptr<PoolBuffer> indices = - AllocateBuffer(allocator, encoder.EstimatedDataEncodedSize()); - - encoder.WriteDict(dict_buffer->mutable_data()); - int actual_bytes = encoder.WriteIndices(indices->mutable_data(), indices->size()); - - PARQUET_THROW_NOT_OK(indices->Resize(actual_bytes)); - - while (state.KeepRunning()) { - PlainDecoder<Type> dict_decoder(descr.get()); - dict_decoder.SetData(encoder.num_entries(), dict_buffer->data(), dict_buffer->size()); - DictionaryDecoder<Type> decoder(descr.get()); - decoder.SetDict(&dict_decoder); - decoder.SetData(num_values, indices->data(), indices->size()); - decoder.Decode(values.data(), num_values); - } - - state.SetBytesProcessed(state.iterations() * state.range_x() * sizeof(T)); -} - -static void BM_DictDecodingInt64_repeats(::benchmark::State& state) { - typedef Int64Type Type; - typedef typename Type::c_type T; - - std::vector<T> values(state.range_x(), 64); - DecodeDict<Type>(values, state); -} - -BENCHMARK(BM_DictDecodingInt64_repeats)->Range(1024, 65536); - -static void BM_DictDecodingInt64_literals(::benchmark::State& state) { - typedef Int64Type Type; - typedef typename Type::c_type T; - - std::vector<T> values(state.range_x()); - for (size_t i = 0; i < values.size(); ++i) { - values[i] = i; - } - DecodeDict<Type>(values, state); -} - -BENCHMARK(BM_DictDecodingInt64_literals)->Range(1024, 65536); - -} // namespace benchmark - -} // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/encodings/encoding-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/encoding-test.cc b/src/parquet/encodings/encoding-test.cc deleted file mode 100644 index 1e9894d..0000000 --- a/src/parquet/encodings/encoding-test.cc +++ /dev/null @@ -1,307 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include <cstdint> -#include <cstdlib> -#include <cstring> -#include <gtest/gtest.h> -#include <string> -#include <vector> - -#include "parquet/encodings/dictionary-encoding.h" -#include "parquet/encodings/plain-encoding.h" -#include "parquet/schema.h" -#include "parquet/types.h" -#include "parquet/util/bit-util.h" -#include "parquet/util/memory.h" -#include "parquet/util/test-common.h" - -using std::string; -using std::vector; - -namespace parquet { - -namespace test { - -TEST(VectorBooleanTest, TestEncodeDecode) { - // PARQUET-454 - int nvalues = 10000; - int nbytes = BitUtil::Ceil(nvalues, 8); - - // seed the prng so failure is deterministic - vector<bool> draws = flip_coins_seed(nvalues, 0.5, 0); - - PlainEncoder<BooleanType> encoder(nullptr); - PlainDecoder<BooleanType> decoder(nullptr); - - encoder.Put(draws, nvalues); - - std::shared_ptr<Buffer> encode_buffer = encoder.FlushValues(); - ASSERT_EQ(nbytes, encode_buffer->size()); - - vector<uint8_t> decode_buffer(nbytes); - const uint8_t* decode_data = &decode_buffer[0]; - - decoder.SetData(nvalues, encode_buffer->data(), encode_buffer->size()); - int values_decoded = decoder.Decode(&decode_buffer[0], nvalues); - ASSERT_EQ(nvalues, values_decoded); - - for (int i = 0; i < nvalues; ++i) { - ASSERT_EQ(draws[i], BitUtil::GetArrayBit(decode_data, i)) << i; - } -} - -// ---------------------------------------------------------------------- -// test data generation - -template <typename T> -void GenerateData(int num_values, T* out, vector<uint8_t>* heap) { - // seed the prng so failure is deterministic - random_numbers( - num_values, 0, std::numeric_limits<T>::min(), std::numeric_limits<T>::max(), out); -} - -template <> -void GenerateData<bool>(int num_values, bool* out, vector<uint8_t>* heap) { - // seed the prng so failure is deterministic - random_bools(num_values, 0.5, 0, out); -} - -template <> -void GenerateData<Int96>(int num_values, Int96* out, vector<uint8_t>* heap) { - // seed the prng so failure is deterministic - random_Int96_numbers(num_values, 0, std::numeric_limits<int32_t>::min(), - std::numeric_limits<int32_t>::max(), out); -} - -template <> -void GenerateData<ByteArray>(int num_values, ByteArray* out, vector<uint8_t>* heap) { - // seed the prng so failure is deterministic - int max_byte_array_len = 12; - heap->resize(num_values * max_byte_array_len); - random_byte_array(num_values, 0, heap->data(), out, 2, max_byte_array_len); -} - -static int flba_length = 8; - -template <> -void GenerateData<FLBA>(int num_values, FLBA* out, vector<uint8_t>* heap) { - // seed the prng so failure is deterministic - heap->resize(num_values * flba_length); - random_fixed_byte_array(num_values, 0, heap->data(), flba_length, out); -} - -template <typename T> -void VerifyResults(T* result, T* expected, int num_values) { - for (int i = 0; i < num_values; ++i) { - ASSERT_EQ(expected[i], result[i]) << i; - } -} - -template <> -void VerifyResults<FLBA>(FLBA* result, FLBA* expected, int num_values) { - for (int i = 0; i < num_values; ++i) { - ASSERT_EQ(0, memcmp(expected[i].ptr, result[i].ptr, flba_length)) << i; - } -} - -// ---------------------------------------------------------------------- -// Create some column descriptors - -template <typename DType> -std::shared_ptr<ColumnDescriptor> ExampleDescr() { - auto node = schema::PrimitiveNode::Make("name", Repetition::OPTIONAL, DType::type_num); - return std::make_shared<ColumnDescriptor>(node, 0, 0); -} - -template <> -std::shared_ptr<ColumnDescriptor> ExampleDescr<FLBAType>() { - auto node = schema::PrimitiveNode::Make("name", Repetition::OPTIONAL, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, flba_length, 10, 2); - return std::make_shared<ColumnDescriptor>(node, 0, 0); -} - -// ---------------------------------------------------------------------- -// Plain encoding tests - -template <typename Type> -class TestEncodingBase : public ::testing::Test { - public: - typedef typename Type::c_type T; - static constexpr int TYPE = Type::type_num; - - void SetUp() { - descr_ = ExampleDescr<Type>(); - type_length_ = descr_->type_length(); - allocator_ = default_allocator(); - } - - void TearDown() { pool_.FreeAll(); } - - void InitData(int nvalues, int repeats) { - num_values_ = nvalues * repeats; - input_bytes_.resize(num_values_ * sizeof(T)); - output_bytes_.resize(num_values_ * sizeof(T)); - draws_ = reinterpret_cast<T*>(input_bytes_.data()); - decode_buf_ = reinterpret_cast<T*>(output_bytes_.data()); - GenerateData<T>(nvalues, draws_, &data_buffer_); - - // add some repeated values - for (int j = 1; j < repeats; ++j) { - for (int i = 0; i < nvalues; ++i) { - draws_[nvalues * j + i] = draws_[i]; - } - } - } - - virtual void CheckRoundtrip() = 0; - - void Execute(int nvalues, int repeats) { - InitData(nvalues, repeats); - CheckRoundtrip(); - } - - protected: - ChunkedAllocator pool_; - MemoryAllocator* allocator_; - - int num_values_; - int type_length_; - T* draws_; - T* decode_buf_; - vector<uint8_t> input_bytes_; - vector<uint8_t> output_bytes_; - vector<uint8_t> data_buffer_; - - std::shared_ptr<Buffer> encode_buffer_; - std::shared_ptr<ColumnDescriptor> descr_; -}; - -// Member variables are not visible to templated subclasses. Possibly figure -// out an alternative to this class layering at some point -#define USING_BASE_MEMBERS() \ - using TestEncodingBase<Type>::pool_; \ - using TestEncodingBase<Type>::allocator_; \ - using TestEncodingBase<Type>::descr_; \ - using TestEncodingBase<Type>::num_values_; \ - using TestEncodingBase<Type>::draws_; \ - using TestEncodingBase<Type>::data_buffer_; \ - using TestEncodingBase<Type>::type_length_; \ - using TestEncodingBase<Type>::encode_buffer_; \ - using TestEncodingBase<Type>::decode_buf_; - -template <typename Type> -class TestPlainEncoding : public TestEncodingBase<Type> { - public: - typedef typename Type::c_type T; - static constexpr int TYPE = Type::type_num; - - virtual void CheckRoundtrip() { - PlainEncoder<Type> encoder(descr_.get()); - PlainDecoder<Type> decoder(descr_.get()); - encoder.Put(draws_, num_values_); - encode_buffer_ = encoder.FlushValues(); - - decoder.SetData(num_values_, encode_buffer_->data(), encode_buffer_->size()); - int values_decoded = decoder.Decode(decode_buf_, num_values_); - ASSERT_EQ(num_values_, values_decoded); - VerifyResults<T>(decode_buf_, draws_, num_values_); - } - - protected: - USING_BASE_MEMBERS(); -}; - -TYPED_TEST_CASE(TestPlainEncoding, ParquetTypes); - -TYPED_TEST(TestPlainEncoding, BasicRoundTrip) { - this->Execute(10000, 1); -} - -// ---------------------------------------------------------------------- -// Dictionary encoding tests - -typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType, - ByteArrayType, FLBAType> - DictEncodedTypes; - -template <typename Type> -class TestDictionaryEncoding : public TestEncodingBase<Type> { - public: - typedef typename Type::c_type T; - static constexpr int TYPE = Type::type_num; - - void CheckRoundtrip() { - std::vector<uint8_t> valid_bits(BitUtil::RoundUpNumBytes(num_values_) + 1, 255); - DictEncoder<Type> encoder(descr_.get(), &pool_); - - ASSERT_NO_THROW(encoder.Put(draws_, num_values_)); - dict_buffer_ = AllocateBuffer(default_allocator(), encoder.dict_encoded_size()); - encoder.WriteDict(dict_buffer_->mutable_data()); - std::shared_ptr<Buffer> indices = encoder.FlushValues(); - - DictEncoder<Type> spaced_encoder(descr_.get(), &pool_); - // PutSpaced should lead to the same results - ASSERT_NO_THROW(spaced_encoder.PutSpaced(draws_, num_values_, valid_bits.data(), 0)); - std::shared_ptr<Buffer> indices_from_spaced = spaced_encoder.FlushValues(); - ASSERT_TRUE(indices_from_spaced->Equals(*indices)); - - PlainDecoder<Type> dict_decoder(descr_.get()); - dict_decoder.SetData( - encoder.num_entries(), dict_buffer_->data(), dict_buffer_->size()); - - DictionaryDecoder<Type> decoder(descr_.get()); - decoder.SetDict(&dict_decoder); - - decoder.SetData(num_values_, indices->data(), indices->size()); - int values_decoded = decoder.Decode(decode_buf_, num_values_); - ASSERT_EQ(num_values_, values_decoded); - - // TODO(wesm): The DictionaryDecoder must stay alive because the decoded - // values' data is owned by a buffer inside the DictionaryEncoder. We - // should revisit when data lifetime is reviewed more generally. - VerifyResults<T>(decode_buf_, draws_, num_values_); - - // Also test spaced decoding - decoder.SetData(num_values_, indices->data(), indices->size()); - values_decoded = - decoder.DecodeSpaced(decode_buf_, num_values_, 0, valid_bits.data(), 0); - ASSERT_EQ(num_values_, values_decoded); - VerifyResults<T>(decode_buf_, draws_, num_values_); - } - - protected: - USING_BASE_MEMBERS(); - std::shared_ptr<PoolBuffer> dict_buffer_; -}; - -TYPED_TEST_CASE(TestDictionaryEncoding, DictEncodedTypes); - -TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) { - this->Execute(2500, 2); -} - -TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) { - PlainDecoder<BooleanType> dict_decoder(nullptr); - DictionaryDecoder<BooleanType> decoder(nullptr); - - ASSERT_THROW(decoder.SetDict(&dict_decoder), ParquetException); -} - -} // namespace test - -} // namespace parquet http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/encodings/plain-encoding.h ---------------------------------------------------------------------- diff --git a/src/parquet/encodings/plain-encoding.h b/src/parquet/encodings/plain-encoding.h deleted file mode 100644 index 5e7e269..0000000 --- a/src/parquet/encodings/plain-encoding.h +++ /dev/null @@ -1,290 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef PARQUET_PLAIN_ENCODING_H -#define PARQUET_PLAIN_ENCODING_H - -#include <algorithm> -#include <vector> - -#include "parquet/encodings/decoder.h" -#include "parquet/encodings/encoder.h" -#include "parquet/schema.h" -#include "parquet/util/bit-stream-utils.inline.h" -#include "parquet/util/memory.h" - -namespace parquet { - -// ---------------------------------------------------------------------- -// Encoding::PLAIN decoder implementation - -template <typename DType> -class PlainDecoder : public Decoder<DType> { - public: - typedef typename DType::c_type T; - using Decoder<DType>::num_values_; - - explicit PlainDecoder(const ColumnDescriptor* descr) - : Decoder<DType>(descr, Encoding::PLAIN), data_(NULL), len_(0) { - if (descr_ && descr_->physical_type() == Type::FIXED_LEN_BYTE_ARRAY) { - type_length_ = descr_->type_length(); - } else { - type_length_ = -1; - } - } - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - data_ = data; - len_ = len; - } - - virtual int Decode(T* buffer, int max_values); - - private: - using Decoder<DType>::descr_; - const uint8_t* data_; - int len_; - int type_length_; -}; - -// Decode routine templated on C++ type rather than type enum -template <typename T> -inline int DecodePlain( - const uint8_t* data, int64_t data_size, int num_values, int type_length, T* out) { - int bytes_to_decode = num_values * sizeof(T); - if (data_size < bytes_to_decode) { ParquetException::EofException(); } - memcpy(out, data, bytes_to_decode); - return bytes_to_decode; -} - -// Template specialization for BYTE_ARRAY. The written values do not own their -// own data. -template <> -inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int num_values, - int type_length, ByteArray* out) { - int bytes_decoded = 0; - int increment; - for (int i = 0; i < num_values; ++i) { - uint32_t len = out[i].len = *reinterpret_cast<const uint32_t*>(data); - increment = sizeof(uint32_t) + len; - if (data_size < increment) ParquetException::EofException(); - out[i].ptr = data + sizeof(uint32_t); - data += increment; - data_size -= increment; - bytes_decoded += increment; - } - return bytes_decoded; -} - -// Template specialization for FIXED_LEN_BYTE_ARRAY. The written values do not -// own their own data. -template <> -inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size, - int num_values, int type_length, FixedLenByteArray* out) { - int bytes_to_decode = type_length * num_values; - if (data_size < bytes_to_decode) { ParquetException::EofException(); } - for (int i = 0; i < num_values; ++i) { - out[i].ptr = data; - data += type_length; - data_size -= type_length; - } - return bytes_to_decode; -} - -template <typename DType> -inline int PlainDecoder<DType>::Decode(T* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - int bytes_consumed = DecodePlain<T>(data_, len_, max_values, type_length_, buffer); - data_ += bytes_consumed; - len_ -= bytes_consumed; - num_values_ -= max_values; - return max_values; -} - -template <> -class PlainDecoder<BooleanType> : public Decoder<BooleanType> { - public: - explicit PlainDecoder(const ColumnDescriptor* descr) - : Decoder<BooleanType>(descr, Encoding::PLAIN) {} - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - bit_reader_ = BitReader(data, len); - } - - // Two flavors of bool decoding - int Decode(uint8_t* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - bool val; - for (int i = 0; i < max_values; ++i) { - if (!bit_reader_.GetValue(1, &val)) { ParquetException::EofException(); } - BitUtil::SetArrayBit(buffer, i, val); - } - num_values_ -= max_values; - return max_values; - } - - virtual int Decode(bool* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - if (bit_reader_.GetBatch(1, buffer, max_values) != max_values) { - ParquetException::EofException(); - } - num_values_ -= max_values; - return max_values; - } - - private: - BitReader bit_reader_; -}; - -// ---------------------------------------------------------------------- -// Encoding::PLAIN encoder implementation - -template <typename DType> -class PlainEncoder : public Encoder<DType> { - public: - typedef typename DType::c_type T; - - explicit PlainEncoder( - const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) - : Encoder<DType>(descr, Encoding::PLAIN, allocator) { - values_sink_.reset(new InMemoryOutputStream(allocator)); - } - - int64_t EstimatedDataEncodedSize() override { return values_sink_->Tell(); } - - std::shared_ptr<Buffer> FlushValues() override; - void Put(const T* src, int num_values) override; - - protected: - std::unique_ptr<InMemoryOutputStream> values_sink_; -}; - -template <> -class PlainEncoder<BooleanType> : public Encoder<BooleanType> { - public: - explicit PlainEncoder( - const ColumnDescriptor* descr, MemoryAllocator* allocator = default_allocator()) - : Encoder<BooleanType>(descr, Encoding::PLAIN, allocator), - bits_available_(kInMemoryDefaultCapacity * 8), - bits_buffer_(AllocateBuffer(allocator, kInMemoryDefaultCapacity)), - values_sink_(new InMemoryOutputStream(allocator)) { - bit_writer_.reset(new BitWriter(bits_buffer_->mutable_data(), bits_buffer_->size())); - } - - int64_t EstimatedDataEncodedSize() override { - return values_sink_->Tell() + bit_writer_->bytes_written(); - } - - std::shared_ptr<Buffer> FlushValues() override { - if (bits_available_ > 0) { - bit_writer_->Flush(); - values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written()); - bit_writer_->Clear(); - bits_available_ = bits_buffer_->size() * 8; - } - - std::shared_ptr<Buffer> buffer = values_sink_->GetBuffer(); - values_sink_.reset(new InMemoryOutputStream(this->allocator_)); - return buffer; - } - -#define PLAINDECODER_BOOLEAN_PUT(input_type, function_attributes) \ - void Put(input_type src, int num_values) function_attributes { \ - int bit_offset = 0; \ - if (bits_available_ > 0) { \ - int bits_to_write = std::min(bits_available_, num_values); \ - for (int i = 0; i < bits_to_write; i++) { \ - bit_writer_->PutValue(src[i], 1); \ - } \ - bits_available_ -= bits_to_write; \ - bit_offset = bits_to_write; \ - \ - if (bits_available_ == 0) { \ - bit_writer_->Flush(); \ - values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written()); \ - bit_writer_->Clear(); \ - } \ - } \ - \ - int bits_remaining = num_values - bit_offset; \ - while (bit_offset < num_values) { \ - bits_available_ = bits_buffer_->size() * 8; \ - \ - int bits_to_write = std::min(bits_available_, bits_remaining); \ - for (int i = bit_offset; i < bit_offset + bits_to_write; i++) { \ - bit_writer_->PutValue(src[i], 1); \ - } \ - bit_offset += bits_to_write; \ - bits_available_ -= bits_to_write; \ - bits_remaining -= bits_to_write; \ - \ - if (bits_available_ == 0) { \ - bit_writer_->Flush(); \ - values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written()); \ - bit_writer_->Clear(); \ - } \ - } \ - } - - PLAINDECODER_BOOLEAN_PUT(const bool*, override) - PLAINDECODER_BOOLEAN_PUT(const std::vector<bool>&, ) - - protected: - int bits_available_; - std::unique_ptr<BitWriter> bit_writer_; - std::shared_ptr<PoolBuffer> bits_buffer_; - std::unique_ptr<InMemoryOutputStream> values_sink_; -}; - -template <typename DType> -inline std::shared_ptr<Buffer> PlainEncoder<DType>::FlushValues() { - std::shared_ptr<Buffer> buffer = values_sink_->GetBuffer(); - values_sink_.reset(new InMemoryOutputStream(this->allocator_)); - return buffer; -} - -template <typename DType> -inline void PlainEncoder<DType>::Put(const T* buffer, int num_values) { - values_sink_->Write(reinterpret_cast<const uint8_t*>(buffer), num_values * sizeof(T)); -} - -template <> -inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) { - for (int i = 0; i < num_values; ++i) { - // Write the result to the output stream - values_sink_->Write(reinterpret_cast<const uint8_t*>(&src[i].len), sizeof(uint32_t)); - if (src[i].len > 0) { DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL"; } - values_sink_->Write(reinterpret_cast<const uint8_t*>(src[i].ptr), src[i].len); - } -} - -template <> -inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) { - for (int i = 0; i < num_values; ++i) { - // Write the result to the output stream - if (descr_->type_length() > 0) { - DCHECK(nullptr != src[i].ptr) << "Value ptr cannot be NULL"; - } - values_sink_->Write( - reinterpret_cast<const uint8_t*>(src[i].ptr), descr_->type_length()); - } -} -} // namespace parquet - -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/file/file-serialize-test.cc ---------------------------------------------------------------------- diff --git a/src/parquet/file/file-serialize-test.cc b/src/parquet/file/file-serialize-test.cc index 7a90eeb..71dd5c4 100644 --- a/src/parquet/file/file-serialize-test.cc +++ b/src/parquet/file/file-serialize-test.cc @@ -106,8 +106,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> { }; typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType, - BooleanType, ByteArrayType, FLBAType> - TestTypes; + BooleanType, ByteArrayType, FLBAType> TestTypes; TYPED_TEST_CASE(TestSerialize, TestTypes); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/thrift.h ---------------------------------------------------------------------- diff --git a/src/parquet/thrift.h b/src/parquet/thrift.h index 7fa0de3..aafd3f5 100644 --- a/src/parquet/thrift.h +++ b/src/parquet/thrift.h @@ -99,8 +99,7 @@ inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deseriali boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> tmem_transport( new apache::thrift::transport::TMemoryBuffer(const_cast<uint8_t*>(buf), *len)); apache::thrift::protocol::TCompactProtocolFactoryT< - apache::thrift::transport::TMemoryBuffer> - tproto_factory; + apache::thrift::transport::TMemoryBuffer> tproto_factory; boost::shared_ptr<apache::thrift::protocol::TProtocol> tproto = tproto_factory.getProtocol(tmem_transport); try { @@ -122,8 +121,7 @@ inline int64_t SerializeThriftMsg(T* obj, uint32_t len, OutputStream* out) { boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> mem_buffer( new apache::thrift::transport::TMemoryBuffer(len)); apache::thrift::protocol::TCompactProtocolFactoryT< - apache::thrift::transport::TMemoryBuffer> - tproto_factory; + apache::thrift::transport::TMemoryBuffer> tproto_factory; boost::shared_ptr<apache::thrift::protocol::TProtocol> tproto = tproto_factory.getProtocol(mem_buffer); try { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/782049ba/src/parquet/util/test-common.h ---------------------------------------------------------------------- diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h index 2327aeb..edadb53 100644 --- a/src/parquet/util/test-common.h +++ b/src/parquet/util/test-common.h @@ -32,8 +32,7 @@ namespace parquet { namespace test { typedef ::testing::Types<BooleanType, Int32Type, Int64Type, Int96Type, FloatType, - DoubleType, ByteArrayType, FLBAType> - ParquetTypes; + DoubleType, ByteArrayType, FLBAType> ParquetTypes; template <typename T> static inline void assert_vector_equal(const vector<T>& left, const vector<T>& right) {
