wgtmac commented on code in PR #44387: URL: https://github.com/apache/arrow/pull/44387#discussion_r1797718251
########## cpp/src/arrow/io/buffered.cc: ########## @@ -350,7 +364,7 @@ class BufferedInputStream::Impl : public BufferedBase { } Status DoBuffer() { - // Fill buffer + // Refill the buffer from the raw stream with `buffer_size_` bytes. Review Comment: ```suggestion // Fill the buffer from the raw stream with at most `buffer_size_` bytes. ``` ########## cpp/src/arrow/io/buffered.cc: ########## @@ -284,17 +285,30 @@ class BufferedInputStream::Impl : public BufferedBase { // Resize internal read buffer. Note that the internal buffer-size // should be not larger than the raw_read_bound_. Review Comment: ```suggestion // should not be larger than the raw_read_bound_. ``` ########## cpp/src/arrow/io/buffered.cc: ########## @@ -284,17 +285,30 @@ class BufferedInputStream::Impl : public BufferedBase { // Resize internal read buffer. Note that the internal buffer-size // should be not larger than the raw_read_bound_. + // + // SetBufferSize will not change the buffer_pos_ and bytes_buffered_. Status SetBufferSize(int64_t new_buffer_size) { if (new_buffer_size <= 0) { return Status::Invalid("Buffer size should be positive"); } if ((buffer_pos_ + bytes_buffered_) >= new_buffer_size) { - return Status::Invalid("Cannot shrink read buffer if buffered data remains"); + return Status::Invalid( + "Cannot shrink read buffer if buffered data remains, new_buffer_size:", + new_buffer_size, ", buffer_pos:", buffer_pos_, + ", bytes_buffered:", bytes_buffered_, ", buffer_size:", buffer_size_); } if (raw_read_bound_ >= 0) { // No need to reserve space for more than the total remaining number of bytes. - new_buffer_size = std::min(new_buffer_size, - bytes_buffered_ + (raw_read_bound_ - raw_read_total_)); + if (bytes_buffered_ == 0) { + // Special case: we can not keep the current buffer because it does not + // contains any required data. + new_buffer_size = std::min(new_buffer_size, raw_read_bound_ - raw_read_total_); + } else { + // We should keeping the current buffer because it contains data that + // can be read. + new_buffer_size = + std::min(new_buffer_size, buffer_size_ + (raw_read_bound_ - raw_read_total_)); Review Comment: ```suggestion new_buffer_size = std::min(new_buffer_size, buffer_pos_ + buffer_size_ + (raw_read_bound_ - raw_read_total_)); ``` Isn't `buffer_pos_` required to be kept as well? ########## cpp/src/arrow/io/buffered.cc: ########## @@ -51,6 +51,7 @@ class BufferedBase { return !is_open_; } + // Reset the `buffer_` to a new buffer of size `buffer_size_` Review Comment: ```suggestion // Resize buffer_ to buffer_size_ ``` Let's keep consistent variable names in the comment across this file. ########## cpp/src/arrow/io/buffered.cc: ########## @@ -284,17 +285,30 @@ class BufferedInputStream::Impl : public BufferedBase { // Resize internal read buffer. Note that the internal buffer-size // should be not larger than the raw_read_bound_. + // + // SetBufferSize will not change the buffer_pos_ and bytes_buffered_. Review Comment: ```suggestion // It will not change buffer states including buffer_pos_ and bytes_buffered_. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org