This is an automated email from the ASF dual-hosted git repository.
maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 01deb9438a GH-39419: [C++][Parquet] Style: Using arrow::Buffer data_as
api rather than reinterpret_cast (#39420)
01deb9438a is described below
commit 01deb9438acde11f1968acd2a0bb5d3e8e4a4cc6
Author: mwish <[email protected]>
AuthorDate: Fri Jan 5 23:44:52 2024 +0800
GH-39419: [C++][Parquet] Style: Using arrow::Buffer data_as api rather than
reinterpret_cast (#39420)
### Rationale for this change
This patch using `{mutable}_data_as<T>()` api to replace
`interpret_cast<{const} T*>`. It's just a style fixing.
### What changes are included in this PR?
Just api replacement for `::arrow::Buffer`
* `reinterpret_cast<T*>` -> `mutable_data_as<T>()`
* `reinterpret_cast<const T*>` -> `data_as<T>()`
Also, for `auto {variable_name} = reinterpret_cast<{mutable} T*>( ... )`, I
changed it to:
1. `const auto*` for `data_as<T>()`.
2. `auto*` for `mutable_data_as<T>()`
This didn't change the syntax, but make it more readable.
### Are these changes tested?
No need
### Are there any user-facing changes?
no
* Closes: #39419
*
Authored-by: mwish <[email protected]>
Signed-off-by: mwish <[email protected]>
---
cpp/src/parquet/encoding.cc | 74 ++++++++++++++++++++-------------------------
1 file changed, 33 insertions(+), 41 deletions(-)
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 840efa12cc..b07ad6c9fb 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -125,7 +125,7 @@ class PlainEncoder : public EncoderImpl, virtual public
TypedEncoder<DType> {
if (valid_bits != NULLPTR) {
PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values
* sizeof(T),
this->memory_pool()));
- T* data = reinterpret_cast<T*>(buffer->mutable_data());
+ T* data = buffer->template mutable_data_as<T>();
int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
src, num_values, valid_bits, valid_bits_offset, data);
Put(data, num_valid_values);
@@ -323,7 +323,7 @@ class PlainEncoder<BooleanType> : public EncoderImpl,
virtual public BooleanEnco
if (valid_bits != NULLPTR) {
PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values
* sizeof(T),
this->memory_pool()));
- T* data = reinterpret_cast<T*>(buffer->mutable_data());
+ T* data = buffer->mutable_data_as<T>();
int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
src, num_values, valid_bits, valid_bits_offset, data);
Put(data, num_valid_values);
@@ -882,7 +882,7 @@ void ByteStreamSplitEncoder<DType>::PutSpaced(const T* src,
int num_values,
if (valid_bits != NULLPTR) {
PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values *
sizeof(T),
this->memory_pool()));
- T* data = reinterpret_cast<T*>(buffer->mutable_data());
+ T* data = buffer->template mutable_data_as<T>();
int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
src, num_values, valid_bits, valid_bits_offset, data);
Put(data, num_valid_values);
@@ -1080,7 +1080,7 @@ inline int DecodePlain<FixedLenByteArray>(const uint8_t*
data, int64_t data_size
ParquetException::EofException();
}
for (int i = 0; i < num_values; ++i) {
- out[i].ptr = data + i * type_length;
+ out[i].ptr = data + i * static_cast<int64_t>(type_length);
}
return static_cast<int>(bytes_to_decode);
}
@@ -1537,9 +1537,8 @@ class DictDecoderImpl : public DecoderImpl, virtual
public DictDecoder<Type> {
int Decode(T* buffer, int num_values) override {
num_values = std::min(num_values, num_values_);
- int decoded_values =
- idx_decoder_.GetBatchWithDict(reinterpret_cast<const
T*>(dictionary_->data()),
- dictionary_length_, buffer, num_values);
+ int decoded_values = idx_decoder_.GetBatchWithDict(
+ dictionary_->data_as<T>(), dictionary_length_, buffer, num_values);
if (decoded_values != num_values) {
ParquetException::EofException();
}
@@ -1551,9 +1550,8 @@ class DictDecoderImpl : public DecoderImpl, virtual
public DictDecoder<Type> {
int64_t valid_bits_offset) override {
num_values = std::min(num_values, num_values_);
if (num_values != idx_decoder_.GetBatchWithDictSpaced(
- reinterpret_cast<const T*>(dictionary_->data()),
- dictionary_length_, buffer, num_values, null_count,
valid_bits,
- valid_bits_offset)) {
+ dictionary_->data_as<T>(), dictionary_length_,
buffer,
+ num_values, null_count, valid_bits,
valid_bits_offset)) {
ParquetException::EofException();
}
num_values_ -= num_values;
@@ -1580,8 +1578,7 @@ class DictDecoderImpl : public DecoderImpl, virtual
public DictDecoder<Type> {
num_values, /*shrink_to_fit=*/false));
}
- auto indices_buffer =
- reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data());
+ auto indices_buffer = indices_scratch_space_->mutable_data_as<int32_t>();
if (num_values != idx_decoder_.GetBatchSpaced(num_values, null_count,
valid_bits,
valid_bits_offset,
indices_buffer)) {
@@ -1611,8 +1608,7 @@ class DictDecoderImpl : public DecoderImpl, virtual
public DictDecoder<Type> {
PARQUET_THROW_NOT_OK(indices_scratch_space_->TypedResize<int32_t>(
num_values, /*shrink_to_fit=*/false));
}
- auto indices_buffer =
- reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data());
+ auto indices_buffer = indices_scratch_space_->mutable_data_as<int32_t>();
if (num_values != idx_decoder_.GetBatch(indices_buffer, num_values)) {
ParquetException::EofException();
}
@@ -1632,7 +1628,7 @@ class DictDecoderImpl : public DecoderImpl, virtual
public DictDecoder<Type> {
void GetDictionary(const T** dictionary, int32_t* dictionary_length)
override {
*dictionary_length = dictionary_length_;
- *dictionary = reinterpret_cast<T*>(dictionary_->mutable_data());
+ *dictionary = dictionary_->mutable_data_as<T>();
}
protected:
@@ -1647,8 +1643,7 @@ class DictDecoderImpl : public DecoderImpl, virtual
public DictDecoder<Type> {
dictionary_length_ = static_cast<int32_t>(dictionary->values_left());
PARQUET_THROW_NOT_OK(dictionary_->Resize(dictionary_length_ * sizeof(T),
/*shrink_to_fit=*/false));
- dictionary->Decode(reinterpret_cast<T*>(dictionary_->mutable_data()),
- dictionary_length_);
+ dictionary->Decode(dictionary_->mutable_data_as<T>(), dictionary_length_);
}
// Only one is set.
@@ -1688,7 +1683,7 @@ template <>
void DictDecoderImpl<ByteArrayType>::SetDict(TypedDecoder<ByteArrayType>*
dictionary) {
DecodeDict(dictionary);
- auto dict_values = reinterpret_cast<ByteArray*>(dictionary_->mutable_data());
+ auto* dict_values = dictionary_->mutable_data_as<ByteArray>();
int total_size = 0;
for (int i = 0; i < dictionary_length_; ++i) {
@@ -1702,8 +1697,7 @@ void
DictDecoderImpl<ByteArrayType>::SetDict(TypedDecoder<ByteArrayType>* dictio
int32_t offset = 0;
uint8_t* bytes_data = byte_array_data_->mutable_data();
- int32_t* bytes_offsets =
- reinterpret_cast<int32_t*>(byte_array_offsets_->mutable_data());
+ int32_t* bytes_offsets = byte_array_offsets_->mutable_data_as<int32_t>();
for (int i = 0; i < dictionary_length_; ++i) {
memcpy(bytes_data + offset, dict_values[i].ptr, dict_values[i].len);
bytes_offsets[i] = offset;
@@ -1717,7 +1711,7 @@ template <>
inline void DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>*
dictionary) {
DecodeDict(dictionary);
- auto dict_values = reinterpret_cast<FLBA*>(dictionary_->mutable_data());
+ auto* dict_values = dictionary_->mutable_data_as<FLBA>();
int fixed_len = descr_->type_length();
int total_size = dictionary_length_ * fixed_len;
@@ -1765,7 +1759,7 @@ int DictDecoderImpl<DType>::DecodeArrow(
typename EncodingTraits<DType>::DictAccumulator* builder) {
PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
- auto dict_values = reinterpret_cast<const typename
DType::c_type*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<typename DType::c_type>();
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
@@ -1801,7 +1795,7 @@ inline int DictDecoderImpl<FLBAType>::DecodeArrow(
PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
- auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<FLBA>();
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
@@ -1834,7 +1828,7 @@ int DictDecoderImpl<FLBAType>::DecodeArrow(
PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
- auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<FLBA>();
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
@@ -1858,7 +1852,7 @@ int DictDecoderImpl<Type>::DecodeArrow(
PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
using value_type = typename Type::c_type;
- auto dict_values = reinterpret_cast<const value_type*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<value_type>();
VisitNullBitmapInline(
valid_bits, valid_bits_offset, num_values, null_count,
@@ -1936,7 +1930,7 @@ class DictByteArrayDecoderImpl : public
DictDecoderImpl<ByteArrayType>,
// space for binary data.
RETURN_NOT_OK(helper.Prepare());
- auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<ByteArray>();
int values_decoded = 0;
int num_indices = 0;
int pos_indices = 0;
@@ -2007,7 +2001,7 @@ class DictByteArrayDecoderImpl : public
DictDecoderImpl<ByteArrayType>,
// space for binary data.
RETURN_NOT_OK(helper.Prepare());
- auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<ByteArray>();
while (values_decoded < num_values) {
const int32_t batch_size =
@@ -2037,7 +2031,7 @@ class DictByteArrayDecoderImpl : public
DictDecoderImpl<ByteArrayType>,
RETURN_NOT_OK(builder->Reserve(num_values));
::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset,
num_values);
- auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<ByteArray>();
int values_decoded = 0;
int num_appended = 0;
@@ -2090,7 +2084,7 @@ class DictByteArrayDecoderImpl : public
DictDecoderImpl<ByteArrayType>,
RETURN_NOT_OK(builder->Reserve(num_values));
- auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+ const auto* dict_values = dictionary_->data_as<ByteArray>();
int values_decoded = 0;
while (values_decoded < num_values) {
@@ -2388,7 +2382,7 @@ void DeltaBitPackEncoder<DType>::PutSpaced(const T* src,
int num_values,
if (valid_bits != NULLPTR) {
PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values *
sizeof(T),
this->memory_pool()));
- T* data = reinterpret_cast<T*>(buffer->mutable_data());
+ T* data = buffer->template mutable_data_as<T>();
int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
src, num_values, valid_bits, valid_bits_offset, data);
Put(data, num_valid_values);
@@ -2734,7 +2728,7 @@ void DeltaLengthByteArrayEncoder<DType>::PutSpaced(const
T* src, int num_values,
if (valid_bits != NULLPTR) {
PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values *
sizeof(T),
this->memory_pool()));
- T* data = reinterpret_cast<T*>(buffer->mutable_data());
+ T* data = buffer->template mutable_data_as<T>();
int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
src, num_values, valid_bits, valid_bits_offset, data);
Put(data, num_valid_values);
@@ -2789,8 +2783,7 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
}
int32_t data_size = 0;
- const int32_t* length_ptr =
- reinterpret_cast<const int32_t*>(buffered_length_->data()) +
length_idx_;
+ const int32_t* length_ptr = buffered_length_->data_as<int32_t>() +
length_idx_;
int bytes_offset = len_ - decoder_->bytes_left();
for (int i = 0; i < max_values; ++i) {
int32_t len = length_ptr[i];
@@ -2844,8 +2837,8 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
// call len_decoder_.Decode to decode all the lengths.
// all the lengths are buffered in buffered_length_.
- int ret = len_decoder_.Decode(
- reinterpret_cast<int32_t*>(buffered_length_->mutable_data()),
num_length);
+ int ret =
+ len_decoder_.Decode(buffered_length_->mutable_data_as<int32_t>(),
num_length);
DCHECK_EQ(ret, num_length);
length_idx_ = 0;
num_valid_values_ = num_length;
@@ -2938,7 +2931,7 @@ class RleBooleanEncoder final : public EncoderImpl,
virtual public BooleanEncode
if (valid_bits != NULLPTR) {
PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values
* sizeof(T),
this->memory_pool()));
- T* data = reinterpret_cast<T*>(buffer->mutable_data());
+ T* data = buffer->mutable_data_as<T>();
int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
src, num_values, valid_bits, valid_bits_offset, data);
Put(data, num_valid_values);
@@ -3136,7 +3129,7 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual
public TypedEncoder<DT
} else {
PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
}
- T* data = reinterpret_cast<T*>(buffer_->mutable_data());
+ T* data = buffer_->mutable_data_as<T>();
int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
src, num_values, valid_bits, valid_bits_offset, data);
Put(data, num_valid_values);
@@ -3338,7 +3331,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl,
virtual public TypedDecode
// all the prefix lengths are buffered in buffered_prefix_length_.
PARQUET_THROW_NOT_OK(buffered_prefix_length_->Resize(num_prefix *
sizeof(int32_t)));
int ret = prefix_len_decoder_.Decode(
- reinterpret_cast<int32_t*>(buffered_prefix_length_->mutable_data()),
num_prefix);
+ buffered_prefix_length_->mutable_data_as<int32_t>(), num_prefix);
DCHECK_EQ(ret, num_prefix);
prefix_len_offset_ = 0;
num_valid_values_ = num_prefix;
@@ -3425,8 +3418,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl,
virtual public TypedDecode
int64_t data_size = 0;
const int32_t* prefix_len_ptr =
- reinterpret_cast<const int32_t*>(buffered_prefix_length_->data()) +
- prefix_len_offset_;
+ buffered_prefix_length_->data_as<int32_t>() + prefix_len_offset_;
for (int i = 0; i < max_values; ++i) {
if (prefix_len_ptr[i] == 0) {
// We don't need to copy the suffix if the prefix length is 0.
@@ -3578,7 +3570,7 @@ class ByteStreamSplitDecoder : public DecoderImpl,
virtual public TypedDecoder<D
if (!decode_buffer_ || decode_buffer_->size() < size) {
PARQUET_ASSIGN_OR_THROW(decode_buffer_, ::arrow::AllocateBuffer(size));
}
- return reinterpret_cast<T*>(decode_buffer_->mutable_data());
+ return decode_buffer_->mutable_data_as<T>();
}
private: