m-trieu commented on code in PR #31902:
URL: https://github.com/apache/beam/pull/31902#discussion_r1757335502
##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/observers/DirectStreamObserver.java:
##########
@@ -74,69 +76,125 @@ public void onNext(T value) {
while (true) {
try {
synchronized (lock) {
+ // If we awaited previously and timed out, wait for the same phase.
Otherwise we're
+ // careful to observe the phase before observing isReady.
+ if (awaitPhase < 0) {
+ awaitPhase = isReadyNotifier.getPhase();
+ // If getPhase() returns a value less than 0, the phaser has been
terminated.
+ if (awaitPhase < 0) {
+ return;
+ }
+ }
+
// We only check isReady periodically to effectively allow for
increasing the outbound
// buffer periodically. This reduces the overhead of blocking while
still restricting
// memory because there is a limited # of streams, and we have a max
messages size of 2MB.
if (++messagesSinceReady <= messagesBetweenIsReadyChecks) {
- outboundObserver.onNext(value);
+ tryOnNext(value);
return;
}
- // If we awaited previously and timed out, wait for the same phase.
Otherwise we're
- // careful to observe the phase before observing isReady.
- if (awaitPhase < 0) {
- awaitPhase = phaser.getPhase();
- }
+
if (outboundObserver.isReady()) {
messagesSinceReady = 0;
- outboundObserver.onNext(value);
+ tryOnNext(value);
return;
}
}
+
// A callback has been registered to advance the phaser whenever the
observer
// transitions to is ready. Since we are waiting for a phase observed
before the
// outboundObserver.isReady() returned false, we expect it to advance
after the
// channel has become ready. This doesn't always seem to be the case
(despite
// documentation stating otherwise) so we poll periodically and
enforce an overall
// timeout related to the stream deadline.
- phaser.awaitAdvanceInterruptibly(awaitPhase, waitSeconds,
TimeUnit.SECONDS);
+ int nextPhase =
+ isReadyNotifier.awaitAdvanceInterruptibly(awaitPhase, waitSeconds,
TimeUnit.SECONDS);
+ // If nextPhase is a value less than 0, the phaser has been terminated.
+ if (nextPhase < 0) {
+ return;
+ }
+
synchronized (lock) {
messagesSinceReady = 0;
- outboundObserver.onNext(value);
+ tryOnNext(value);
return;
}
} catch (TimeoutException e) {
+ if (isReadyNotifier.isTerminated()) {
+ return;
+ }
+
totalSecondsWaited += waitSeconds;
- if (totalSecondsWaited > deadlineSeconds) {
- LOG.error(
- "Exceeded timeout waiting for the outboundObserver to become
ready meaning "
- + "that the stream deadline was not respected.");
- throw new RuntimeException(e);
+ if (hasDeadlineExpired(totalSecondsWaited)) {
+ String errorMessage =
constructStreamCancelledErrorMessage(totalSecondsWaited);
+ LOG.error(errorMessage);
+ throw new StreamObserverCancelledException(errorMessage, e);
}
+
if (totalSecondsWaited > 30) {
LOG.info(
"Output channel stalled for {}s, outbound thread {}.",
totalSecondsWaited,
Thread.currentThread().getName());
}
+
waitSeconds = waitSeconds * 2;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- throw new RuntimeException(e);
+ StreamObserverCancelledException ex = new
StreamObserverCancelledException(e);
+ LOG.error("Interrupted while waiting for outboundObserver to become
ready.", ex);
+ throw ex;
+ }
+ }
+ }
+
+ /**
+ * Only send the next value if the phaser is not terminated by the time we
acquire the lock since
+ * the phaser can be terminated at any time.
+ */
+ private void tryOnNext(T value) {
Review Comment:
but if the phaser is terminated, we don't want to call
outboundObserver.onNext() right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]