westonpace commented on a change in pull request #12662:
URL: https://github.com/apache/arrow/pull/12662#discussion_r832734848



##########
File path: cpp/src/arrow/util/async_generator.h
##########
@@ -1058,18 +1109,73 @@ class MergedGenerator {
       return source();
     }
 
+    void SignalErrorUnlocked() {
+      broken = true;
+      // Empty any results that have arrived but not asked for.
+      while (!delivered_jobs.empty()) {
+        delivered_jobs.pop_front();
+      }
+    }
+
+    void Purge() {
+      while (!waiting_jobs.empty()) {
+        waiting_jobs.front()->MarkFinished(IterationEnd<T>());
+        waiting_jobs.pop_front();
+      }
+    }
+
+    void MarkFinished() {
+      all_finished.MarkFinished();
+      Purge();
+    }
+
+    // This is called outside the mutex but it is only ever called
+    // once and Future<>::AddCallback is thread-safe
+    void MarkFinalError(const Status& err, Future<T> maybe_sink) {
+      if (maybe_sink.is_valid()) {
+        // Someone is waiting for this error so lets mark it complete when
+        // all the work is done
+        // all_finished will get called by something with a strong pointer to 
state
+        // so we can safely capture this
+        all_finished.AddCallback([maybe_sink, err](const Status& status) 
mutable {
+          maybe_sink.MarkFinished(err);
+        });
+      } else {
+        // No one is waiting for this error right now so it will be delivered
+        // next.
+        final_error = err;
+      }
+    }
+
+    bool IsComplete() {
+      return outstanding_requests == 0 &&
+             (broken || (source_exhausted && num_running_subscriptions == 0 &&
+                         delivered_jobs.empty()));
+    }
+
+    bool MarkTaskFinishedUnlocked() {
+      --outstanding_requests;
+      return IsComplete();
+    }
+
     AsyncGenerator<AsyncGenerator<T>> source;
     // active_subscriptions and delivered_jobs will be bounded by 
max_subscriptions
     std::vector<AsyncGenerator<T>> active_subscriptions;
     std::deque<std::shared_ptr<DeliveredJob>> delivered_jobs;
     // waiting_jobs is unbounded, reentrant pulls (e.g. AddReadahead) will 
provide the
     // backpressure
     std::deque<std::shared_ptr<Future<T>>> waiting_jobs;
+    // A future that will be marked complete when the terminal item has 
arrived and all
+    // outstanding futures have completed.  It is used to hold off emission of 
an error
+    // until all outstanding work is done.
+    Future<> all_finished = Future<>::Make();
     util::Mutex mutex;
     bool first;
+    bool broken;
     bool source_exhausted;

Review comment:
       There are probably a few coarse grained semantic states:
   `Unstarted` -> `Priming` -> `Running` -> `Winding Down` -> `Completed`.  And 
any of those middle three can branch into `Broken` (which then eventually goes 
to `Completed`)
   
   But I'm not sure how to use this information to make anything cleaner.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to