This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d4fe01  PARQUET-1583: [C++] Remove superfluous parquet::Vector class
2d4fe01 is described below

commit 2d4fe0153cd3edec6ddc8ca742d05c8e3a98f768
Author: Wes McKinney <[email protected]>
AuthorDate: Tue May 21 08:38:09 2019 +0200

    PARQUET-1583: [C++] Remove superfluous parquet::Vector class
    
    This class which is a syntactic sugar layer on `arrow::ResizableBuffer` was 
only being used in one place, for dictionary decoding. I don't think we need to 
maintain this, and it also features possible instantiated template symbol issues
    
    Author: Wes McKinney <[email protected]>
    
    Closes #4354 from wesm/PARQUET-1583 and squashes the following commits:
    
    24535175 <Wes McKinney> lint
    29af0799 <Wes McKinney> Remove superfluous parquet::Vector class
---
 cpp/src/parquet/encoding.cc    | 55 +++++++++++++++++++++++++-----------------
 cpp/src/parquet/util/memory.cc | 50 --------------------------------------
 cpp/src/parquet/util/memory.h  | 22 -----------------
 3 files changed, 33 insertions(+), 94 deletions(-)

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 3a4b342..ebb7aea 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -781,7 +781,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public 
DictDecoder<Type> {
   explicit DictDecoderImpl(const ColumnDescriptor* descr,
                            ::arrow::MemoryPool* pool = 
::arrow::default_memory_pool())
       : DecoderImpl(descr, Encoding::RLE_DICTIONARY),
-        dictionary_(0, pool),
+        dictionary_(AllocateBuffer(pool, 0)),
         byte_array_data_(AllocateBuffer(pool, 0)) {}
 
   // Perform type-specific initiatialization
@@ -798,8 +798,8 @@ class DictDecoderImpl : public DecoderImpl, virtual public 
DictDecoder<Type> {
 
   int Decode(T* buffer, int max_values) override {
     max_values = std::min(max_values, num_values_);
-    int decoded_values =
-        idx_decoder_.GetBatchWithDict(dictionary_.data(), buffer, max_values);
+    int decoded_values = idx_decoder_.GetBatchWithDict(
+        reinterpret_cast<const T*>(dictionary_->data()), buffer, max_values);
     if (decoded_values != max_values) {
       ParquetException::EofException();
     }
@@ -809,9 +809,9 @@ class DictDecoderImpl : public DecoderImpl, virtual public 
DictDecoder<Type> {
 
   int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* 
valid_bits,
                    int64_t valid_bits_offset) override {
-    int decoded_values =
-        idx_decoder_.GetBatchWithDictSpaced(dictionary_.data(), buffer, 
num_values,
-                                            null_count, valid_bits, 
valid_bits_offset);
+    int decoded_values = idx_decoder_.GetBatchWithDictSpaced(
+        reinterpret_cast<const T*>(dictionary_->data()), buffer, num_values, 
null_count,
+        valid_bits, valid_bits_offset);
     if (decoded_values != num_values) {
       ParquetException::EofException();
     }
@@ -819,8 +819,15 @@ class DictDecoderImpl : public DecoderImpl, virtual public 
DictDecoder<Type> {
   }
 
  protected:
+  inline void DecodeDict(TypedDecoder<Type>* dictionary) {
+    int num_dictionary_values = dictionary->values_left();
+    PARQUET_THROW_NOT_OK(dictionary_->Resize(num_dictionary_values * 
sizeof(T)));
+    dictionary->Decode(reinterpret_cast<T*>(dictionary_->mutable_data()),
+                       num_dictionary_values);
+  }
+
   // Only one is set.
-  Vector<T> dictionary_;
+  std::shared_ptr<ResizableBuffer> dictionary_;
 
   // Data that contains the byte array data (byte_array_dictionary_ just has 
the
   // pointers).
@@ -831,9 +838,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public 
DictDecoder<Type> {
 
 template <typename Type>
 inline void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* dictionary) {
-  int num_dictionary_values = dictionary->values_left();
-  dictionary_.Resize(num_dictionary_values);
-  dictionary->Decode(dictionary_.data(), num_dictionary_values);
+  DecodeDict(dictionary);
 }
 
 template <>
@@ -845,12 +850,13 @@ template <>
 inline void DictDecoderImpl<ByteArrayType>::SetDict(
     TypedDecoder<ByteArrayType>* dictionary) {
   int num_dictionary_values = dictionary->values_left();
-  dictionary_.Resize(num_dictionary_values);
-  dictionary->Decode(dictionary_.data(), num_dictionary_values);
+  DecodeDict(dictionary);
+
+  auto dict_values = reinterpret_cast<ByteArray*>(dictionary_->mutable_data());
 
   int total_size = 0;
   for (int i = 0; i < num_dictionary_values; ++i) {
-    total_size += dictionary_[i].len;
+    total_size += dict_values[i].len;
   }
   if (total_size > 0) {
     PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, false));
@@ -859,17 +865,18 @@ inline void DictDecoderImpl<ByteArrayType>::SetDict(
   int offset = 0;
   uint8_t* bytes_data = byte_array_data_->mutable_data();
   for (int i = 0; i < num_dictionary_values; ++i) {
-    memcpy(bytes_data + offset, dictionary_[i].ptr, dictionary_[i].len);
-    dictionary_[i].ptr = bytes_data + offset;
-    offset += dictionary_[i].len;
+    memcpy(bytes_data + offset, dict_values[i].ptr, dict_values[i].len);
+    dict_values[i].ptr = bytes_data + offset;
+    offset += dict_values[i].len;
   }
 }
 
 template <>
 inline void DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>* 
dictionary) {
   int num_dictionary_values = dictionary->values_left();
-  dictionary_.Resize(num_dictionary_values);
-  dictionary->Decode(&dictionary_[0], num_dictionary_values);
+  DecodeDict(dictionary);
+
+  auto dict_values = reinterpret_cast<FLBA*>(dictionary_->mutable_data());
 
   int fixed_len = descr_->type_length();
   int total_size = num_dictionary_values * fixed_len;
@@ -877,8 +884,8 @@ inline void 
DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>* dictionar
   PARQUET_THROW_NOT_OK(byte_array_data_->Resize(total_size, false));
   uint8_t* bytes_data = byte_array_data_->mutable_data();
   for (int32_t i = 0, offset = 0; i < num_dictionary_values; ++i, offset += 
fixed_len) {
-    memcpy(bytes_data + offset, dictionary_[i].ptr, fixed_len);
-    dictionary_[i].ptr = bytes_data + offset;
+    memcpy(bytes_data + offset, dict_values[i].ptr, fixed_len);
+    dict_values[i].ptr = bytes_data + offset;
   }
 }
 
@@ -897,6 +904,8 @@ class DictByteArrayDecoder : public 
DictDecoderImpl<ByteArrayType>,
     builder->Reserve(num_values);
     ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, 
num_values);
 
+    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+
     int values_decoded = 0;
     while (values_decoded < num_values) {
       bool is_valid = bit_reader.IsSet();
@@ -911,7 +920,7 @@ class DictByteArrayDecoder : public 
DictDecoderImpl<ByteArrayType>,
         while (true) {
           // Consume all indices
           if (is_valid) {
-            const auto& val = dictionary_[indices_buffer[i]];
+            const auto& val = dict_values[indices_buffer[i]];
             builder->Append(val.ptr, val.len);
             ++i;
           } else {
@@ -948,12 +957,14 @@ class DictByteArrayDecoder : public 
DictDecoderImpl<ByteArrayType>,
     int values_decoded = 0;
     builder->Reserve(num_values);
 
+    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+
     while (values_decoded < num_values) {
       int32_t batch_size = std::min<int32_t>(buffer_size, num_values - 
values_decoded);
       int num_indices = idx_decoder_.GetBatch(indices_buffer, batch_size);
       if (num_indices == 0) break;
       for (int i = 0; i < num_indices; ++i) {
-        const auto& val = dictionary_[indices_buffer[i]];
+        const auto& val = dict_values[indices_buffer[i]];
         builder->Append(val.ptr, val.len);
       }
       values_decoded += num_indices;
diff --git a/cpp/src/parquet/util/memory.cc b/cpp/src/parquet/util/memory.cc
index 9289601..9640f88 100644
--- a/cpp/src/parquet/util/memory.cc
+++ b/cpp/src/parquet/util/memory.cc
@@ -65,56 +65,6 @@ std::unique_ptr<Codec> GetCodecFromArrow(Compression::type 
codec) {
   return result;
 }
 
-template <class T>
-Vector<T>::Vector(int64_t size, MemoryPool* pool)
-    : buffer_(AllocateBuffer(pool, size * sizeof(T))), size_(size), 
capacity_(size) {
-  if (size > 0) {
-    data_ = reinterpret_cast<T*>(buffer_->mutable_data());
-  } else {
-    data_ = nullptr;
-  }
-}
-
-template <class T>
-void Vector<T>::Reserve(int64_t new_capacity) {
-  if (new_capacity > capacity_) {
-    PARQUET_THROW_NOT_OK(buffer_->Resize(new_capacity * sizeof(T)));
-    data_ = reinterpret_cast<T*>(buffer_->mutable_data());
-    capacity_ = new_capacity;
-  }
-}
-
-template <class T>
-void Vector<T>::Resize(int64_t new_size) {
-  Reserve(new_size);
-  size_ = new_size;
-}
-
-template <class T>
-void Vector<T>::Assign(int64_t size, const T val) {
-  Resize(size);
-  for (int64_t i = 0; i < size_; i++) {
-    data_[i] = val;
-  }
-}
-
-template <class T>
-void Vector<T>::Swap(Vector<T>& v) {
-  buffer_.swap(v.buffer_);
-  std::swap(size_, v.size_);
-  std::swap(capacity_, v.capacity_);
-  std::swap(data_, v.data_);
-}
-
-template class Vector<int32_t>;
-template class Vector<int64_t>;
-template class Vector<bool>;
-template class Vector<float>;
-template class Vector<double>;
-template class Vector<Int96>;
-template class Vector<ByteArray>;
-template class Vector<FixedLenByteArray>;
-
 // ----------------------------------------------------------------------
 // Arrow IO wrappers
 
diff --git a/cpp/src/parquet/util/memory.h b/cpp/src/parquet/util/memory.h
index 0d23c31..0a8cd71 100644
--- a/cpp/src/parquet/util/memory.h
+++ b/cpp/src/parquet/util/memory.h
@@ -56,28 +56,6 @@ using MutableBuffer = ::arrow::MutableBuffer;
 using ResizableBuffer = ::arrow::ResizableBuffer;
 using ResizableBuffer = ::arrow::ResizableBuffer;
 
-template <class T>
-class PARQUET_EXPORT Vector {
- public:
-  explicit Vector(int64_t size, ::arrow::MemoryPool* pool);
-  void Resize(int64_t new_size);
-  void Reserve(int64_t new_capacity);
-  void Assign(int64_t size, const T val);
-  void Swap(Vector<T>& v);
-  inline T& operator[](int64_t i) const { return data_[i]; }
-
-  T* data() { return data_; }
-  const T* data() const { return data_; }
-
- private:
-  std::shared_ptr<ResizableBuffer> buffer_;
-  int64_t size_;
-  int64_t capacity_;
-  T* data_;
-
-  PARQUET_DISALLOW_COPY_AND_ASSIGN(Vector);
-};
-
 // File input and output interfaces that translate arrow::Status to exceptions
 
 class PARQUET_EXPORT FileInterface {

Reply via email to