pitrou commented on code in PR #47998:
URL: https://github.com/apache/arrow/pull/47998#discussion_r2477094086
##########
cpp/src/arrow/util/rle_encoding_internal.h:
##########
@@ -506,40 +506,38 @@ class RleBitPackedEncoder {
: bit_width_(bit_width), bit_writer_(buffer, buffer_len) {
ARROW_DCHECK_GE(bit_width_, 0);
ARROW_DCHECK_LE(bit_width_, 64);
- max_run_byte_size_ = MinBufferSize(bit_width);
+ max_run_byte_size_ = static_cast<int>(MinBufferSize(bit_width));
ARROW_DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big
enough.";
Clear();
}
/// Returns the minimum buffer size needed to use the encoder for 'bit_width'
/// This is the maximum length of a single run for 'bit_width'.
/// It is not valid to pass a buffer less than this length.
- static int MinBufferSize(int bit_width) {
+ static int64_t MinBufferSize(int bit_width) {
// 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values.
- int max_literal_run_size = 1 +
static_cast<int>(::arrow::bit_util::BytesForBits(
- MAX_VALUES_PER_LITERAL_RUN *
bit_width));
+ int64_t max_literal_run_size =
+ 1 + ::arrow::bit_util::BytesForBits(MAX_VALUES_PER_LITERAL_RUN *
bit_width);
Review Comment:
Wow, I was not aware that our RLE-bit-packed encoder did not generate
literal runs of more than 512 values at a time. This might pessimize decoding
performance quite a bit...
@AntoinePrv This might be interesting to you.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]