wgtmac commented on code in PR #44387:
URL: https://github.com/apache/arrow/pull/44387#discussion_r1797718251


##########
cpp/src/arrow/io/buffered.cc:
##########
@@ -350,7 +364,7 @@ class BufferedInputStream::Impl : public BufferedBase {
   }
 
   Status DoBuffer() {
-    // Fill buffer
+    // Refill the buffer from the raw stream with `buffer_size_` bytes.

Review Comment:
   ```suggestion
       // Fill the buffer from the raw stream with at most `buffer_size_` bytes.
   ```



##########
cpp/src/arrow/io/buffered.cc:
##########
@@ -284,17 +285,30 @@ class BufferedInputStream::Impl : public BufferedBase {
 
   // Resize internal read buffer. Note that the internal buffer-size
   // should be not larger than the raw_read_bound_.

Review Comment:
   ```suggestion
     // should not be larger than the raw_read_bound_.
   ```



##########
cpp/src/arrow/io/buffered.cc:
##########
@@ -284,17 +285,30 @@ class BufferedInputStream::Impl : public BufferedBase {
 
   // Resize internal read buffer. Note that the internal buffer-size
   // should be not larger than the raw_read_bound_.
+  //
+  // SetBufferSize will not change the buffer_pos_ and bytes_buffered_.
   Status SetBufferSize(int64_t new_buffer_size) {
     if (new_buffer_size <= 0) {
       return Status::Invalid("Buffer size should be positive");
     }
     if ((buffer_pos_ + bytes_buffered_) >= new_buffer_size) {
-      return Status::Invalid("Cannot shrink read buffer if buffered data 
remains");
+      return Status::Invalid(
+          "Cannot shrink read buffer if buffered data remains, 
new_buffer_size:",
+          new_buffer_size, ", buffer_pos:", buffer_pos_,
+          ", bytes_buffered:", bytes_buffered_, ", buffer_size:", 
buffer_size_);
     }
     if (raw_read_bound_ >= 0) {
       // No need to reserve space for more than the total remaining number of 
bytes.
-      new_buffer_size = std::min(new_buffer_size,
-                                 bytes_buffered_ + (raw_read_bound_ - 
raw_read_total_));
+      if (bytes_buffered_ == 0) {
+        // Special case: we can not keep the current buffer because it does not
+        // contains any required data.
+        new_buffer_size = std::min(new_buffer_size, raw_read_bound_ - 
raw_read_total_);
+      } else {
+        // We should keeping the current buffer because it contains data that
+        // can be read.
+        new_buffer_size =
+            std::min(new_buffer_size, buffer_size_ + (raw_read_bound_ - 
raw_read_total_));

Review Comment:
   ```suggestion
           new_buffer_size =
               std::min(new_buffer_size, buffer_pos_ + buffer_size_ + 
(raw_read_bound_ - raw_read_total_));
   ```
   
   Isn't `buffer_pos_` required to be kept as well?



##########
cpp/src/arrow/io/buffered.cc:
##########
@@ -51,6 +51,7 @@ class BufferedBase {
     return !is_open_;
   }
 
+  // Reset the `buffer_` to a new buffer of size `buffer_size_`

Review Comment:
   ```suggestion
     // Resize buffer_ to buffer_size_
   ```
   
   Let's keep consistent variable names in the comment across this file.



##########
cpp/src/arrow/io/buffered.cc:
##########
@@ -284,17 +285,30 @@ class BufferedInputStream::Impl : public BufferedBase {
 
   // Resize internal read buffer. Note that the internal buffer-size
   // should be not larger than the raw_read_bound_.
+  //
+  // SetBufferSize will not change the buffer_pos_ and bytes_buffered_.

Review Comment:
   ```suggestion
     // It will not change buffer states including buffer_pos_ and 
bytes_buffered_.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to