bkietz commented on code in PR #39995:
URL: https://github.com/apache/arrow/pull/39995#discussion_r1488083820


##########
cpp/src/arrow/dataset/dataset_writer.cc:
##########
@@ -610,31 +610,34 @@ class DatasetWriter::DatasetWriterImpl {
       bool will_open_file = false;
       ARROW_ASSIGN_OR_RAISE(auto next_chunk, dir_queue->NextWritableChunk(
                                                  batch, &remainder, 
&will_open_file));
-
-      backpressure =
-          
writer_state_.rows_in_flight_throttle.Acquire(next_chunk->num_rows());
-      if (!backpressure.is_finished()) {
-        
EVENT_ON_CURRENT_SPAN("DatasetWriter::Backpressure::TooManyRowsQueued");
-        break;
-      }
-      if (will_open_file) {
-        backpressure = writer_state_.open_files_throttle.Acquire(1);
+      // GH-39965: `NextWritableChunk` may return empty batch if file reaches
+      // `max_rows_per_file`.
+      if (next_chunk->num_rows() > 0) {

Review Comment:
   Nit: I'd prefer to see this check as an independent branch rather than 
nesting more deeply
   ```suggestion
         if (next_chunk->num_rows() == 0) {
           // GH-39965: `NextWritableChunk` may return an empty batch to signal
           // that the current file has reached `max_rows_per_file` and should 
be
           // finished.
           batch = std::move(remainder);
           if (batch) {
             RETURN_NOT_OK(dir_queue->FinishCurrentFile());
           }
           continue;
         }
   ```



##########
cpp/src/arrow/dataset/dataset_writer.cc:
##########
@@ -610,31 +610,34 @@ class DatasetWriter::DatasetWriterImpl {
       bool will_open_file = false;
       ARROW_ASSIGN_OR_RAISE(auto next_chunk, dir_queue->NextWritableChunk(
                                                  batch, &remainder, 
&will_open_file));
-
-      backpressure =
-          
writer_state_.rows_in_flight_throttle.Acquire(next_chunk->num_rows());
-      if (!backpressure.is_finished()) {
-        
EVENT_ON_CURRENT_SPAN("DatasetWriter::Backpressure::TooManyRowsQueued");
-        break;
-      }
-      if (will_open_file) {
-        backpressure = writer_state_.open_files_throttle.Acquire(1);
+      // GH-39965: `NextWritableChunk` may return empty batch if file reaches
+      // `max_rows_per_file`.

Review Comment:
   IIUC:
   ```suggestion
         // GH-39965: `NextWritableChunk` may return an empty batch to signal
         // that the current file has reached `max_rows_per_file` and should be
         // finished.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to