mapleFU commented on code in PR #37641:
URL: https://github.com/apache/arrow/pull/37641#discussion_r1337175228
##########
cpp/src/parquet/encoding.cc:
##########
@@ -3370,20 +3375,23 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl,
virtual public TypedDecode
throw ParquetException("excess expansion in DELTA_BYTE_ARRAY");
}
}
- PARQUET_THROW_NOT_OK(buffered_data_->Resize(data_size));
+ PARQUET_THROW_NOT_OK(buffered_data_->Resize(data_size, false));
string_view prefix{last_value_};
uint8_t* data_ptr = buffered_data_->mutable_data();
for (int i = 0; i < max_values; ++i) {
if (ARROW_PREDICT_FALSE(static_cast<size_t>(prefix_len_ptr[i]) >
prefix.length())) {
throw ParquetException("prefix length too large in DELTA_BYTE_ARRAY");
}
- memcpy(data_ptr, prefix.data(), prefix_len_ptr[i]);
- // buffer[i] currently points to the string suffix
- memcpy(data_ptr + prefix_len_ptr[i], buffer[i].ptr, buffer[i].len);
- buffer[i].ptr = data_ptr;
- buffer[i].len += prefix_len_ptr[i];
- data_ptr += buffer[i].len;
+ // If the prefix length is zero, the prefix can be ignored.
Review Comment:
@pitrou I can separate a pr for this. I think we can optimize two cases:
1. Prefix == 0
2. Posfix == 0
Each of the case can be well optimize to avoid copying and memory
allocation. I can separate for that
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]