scwhittle commented on code in PR #32774:
URL: https://github.com/apache/beam/pull/32774#discussion_r1843484534
##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java:
##########
@@ -269,31 +312,43 @@ public final boolean awaitTermination(int time, TimeUnit
unit) throws Interrupte
@Override
public final Instant startTime() {
- return new Instant(startTimeMs.get());
+ return new Instant(debugMetrics.getStartTimeMs());
}
@Override
public String backendWorkerToken() {
return backendWorkerToken;
}
+ @SuppressWarnings("GuardedBy")
@Override
- public void shutdown() {
- if (isShutdown.compareAndSet(false, true)) {
- requestObserver()
- .onError(new WindmillStreamShutdownException("Explicit call to
shutdown stream."));
+ public final void shutdown() {
+ // Don't lock on "this" before poisoning the request observer since
otherwise the observer may
+ // be blocking in send().
+ requestObserver.poison();
+ synchronized (this) {
+ if (!isShutdown) {
+ isShutdown = true;
+ debugMetrics.recordShutdown();
+ shutdownInternal();
+ }
}
}
- private void setLastError(String error) {
- lastError.set(error);
- lastErrorTime.set(DateTime.now());
- }
-
- public static class WindmillStreamShutdownException extends RuntimeException
{
- public WindmillStreamShutdownException(String message) {
- super(message);
+ protected abstract void shutdownInternal();
+
+ /** Returns true if the stream was torn down and should not be restarted
internally. */
+ private synchronized boolean maybeTearDownStream() {
+ if (requestObserver.hasReceivedPoisonPill()
Review Comment:
what is this receivedPoisonPill check guarding against?
It is racy because we poison outside of the synchronized block so we could
have
T1: notices unrelated stream failure, passes this check and isn't poisoned,
starts calling onNewStream
T2: calls shutdown, poisons request observer
T1: calls requestObserver.reset() gets exception due to poison.
Instead of checking the poison here, it seems like we should just handle the
exception due to reset failing as that covers both cases.
##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java:
##########
@@ -269,31 +312,43 @@ public final boolean awaitTermination(int time, TimeUnit
unit) throws Interrupte
@Override
public final Instant startTime() {
- return new Instant(startTimeMs.get());
+ return new Instant(debugMetrics.getStartTimeMs());
}
@Override
public String backendWorkerToken() {
return backendWorkerToken;
}
+ @SuppressWarnings("GuardedBy")
Review Comment:
remove suppression
##########
runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcCommitWorkStreamTest.java:
##########
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker.windmill.client.grpc;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.argThat;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.spy;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import javax.annotation.Nullable;
+import
org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc;
+import org.apache.beam.runners.dataflow.worker.windmill.Windmill;
+import org.apache.beam.runners.dataflow.worker.windmill.client.WindmillStream;
+import
org.apache.beam.runners.dataflow.worker.windmill.client.grpc.observers.StreamObserverCancelledException;
+import
org.apache.beam.runners.dataflow.worker.windmill.client.throttling.ThrottleTimer;
+import org.apache.beam.vendor.grpc.v1p60p1.com.google.protobuf.ByteString;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.ManagedChannel;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.Server;
+import
org.apache.beam.vendor.grpc.v1p60p1.io.grpc.inprocess.InProcessChannelBuilder;
+import
org.apache.beam.vendor.grpc.v1p60p1.io.grpc.inprocess.InProcessServerBuilder;
+import
org.apache.beam.vendor.grpc.v1p60p1.io.grpc.stub.ServerCallStreamObserver;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.stub.StreamObserver;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.testing.GrpcCleanupRule;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.util.MutableHandlerRegistry;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.InOrder;
+
+@RunWith(JUnit4.class)
+public class GrpcCommitWorkStreamTest {
+ private static final String FAKE_SERVER_NAME = "Fake server for
GrpcCommitWorkStreamTest";
+ private static final Windmill.JobHeader TEST_JOB_HEADER =
+ Windmill.JobHeader.newBuilder()
+ .setJobId("test_job")
+ .setWorkerId("test_worker")
+ .setProjectId("test_project")
+ .build();
+ private static final String COMPUTATION_ID = "computationId";
+
+ @Rule public final GrpcCleanupRule grpcCleanup = new GrpcCleanupRule();
+ private final MutableHandlerRegistry serviceRegistry = new
MutableHandlerRegistry();
+ @Rule public transient Timeout globalTimeout = Timeout.seconds(600);
+ private ManagedChannel inProcessChannel;
+
+ private static Windmill.WorkItemCommitRequest workItemCommitRequest(long
value) {
+ return Windmill.WorkItemCommitRequest.newBuilder()
+ .setKey(ByteString.EMPTY)
+ .setShardingKey(value)
+ .setWorkToken(value)
+ .setCacheToken(value)
+ .build();
+ }
+
+ @Before
+ public void setUp() throws IOException {
+ Server server =
+ InProcessServerBuilder.forName(FAKE_SERVER_NAME)
+ .fallbackHandlerRegistry(serviceRegistry)
+ .directExecutor()
+ .build()
+ .start();
+
+ inProcessChannel =
+ grpcCleanup.register(
+
InProcessChannelBuilder.forName(FAKE_SERVER_NAME).directExecutor().build());
+ grpcCleanup.register(server);
+ grpcCleanup.register(inProcessChannel);
+ }
+
+ @After
+ public void cleanUp() {
+ inProcessChannel.shutdownNow();
+ }
+
+ private GrpcCommitWorkStream createCommitWorkStream(CommitWorkStreamTestStub
testStub) {
+ serviceRegistry.addService(testStub);
+ GrpcCommitWorkStream commitWorkStream =
+ (GrpcCommitWorkStream)
+ GrpcWindmillStreamFactory.of(TEST_JOB_HEADER)
+ .build()
+ .createCommitWorkStream(
+ CloudWindmillServiceV1Alpha1Grpc.newStub(inProcessChannel),
+ new ThrottleTimer());
+ commitWorkStream.start();
+ return commitWorkStream;
+ }
+
+ @Test
+ public void testShutdown_abortsQueuedCommits() throws InterruptedException {
+ int numCommits = 5;
+ CountDownLatch commitProcessed = new CountDownLatch(numCommits);
+ Set<Windmill.CommitStatus> onDone = new HashSet<>();
+
+ TestCommitWorkStreamRequestObserver requestObserver =
+ spy(new TestCommitWorkStreamRequestObserver());
+ CommitWorkStreamTestStub testStub = new
CommitWorkStreamTestStub(requestObserver);
+ GrpcCommitWorkStream commitWorkStream = createCommitWorkStream(testStub);
+ InOrder requestObserverVerifier = inOrder(requestObserver);
+ try (WindmillStream.CommitWorkStream.RequestBatcher batcher =
commitWorkStream.batcher()) {
+ for (int i = 0; i < numCommits; i++) {
+ batcher.commitWorkItem(
+ COMPUTATION_ID,
+ workItemCommitRequest(i),
+ commitStatus -> {
+ onDone.add(commitStatus);
+ commitProcessed.countDown();
+ });
+ }
+ } catch (StreamObserverCancelledException ignored) {
+ }
+
+ // Verify that we sent the commits above in a request + the initial header.
+ requestObserverVerifier
+ .verify(requestObserver)
+ .onNext(argThat(request ->
request.getHeader().equals(TEST_JOB_HEADER)));
+ requestObserverVerifier
+ .verify(requestObserver)
+ .onNext(argThat(request -> !request.getCommitChunkList().isEmpty()));
+ requestObserverVerifier.verifyNoMoreInteractions();
+
+ // We won't get responses so we will have some pending requests.
+ assertTrue(commitWorkStream.hasPendingRequests());
+ commitWorkStream.shutdown();
+ commitProcessed.await();
+
+ assertThat(onDone).containsExactly(Windmill.CommitStatus.ABORTED);
+ }
+
+ @Test
+ public void testCommitWorkItem_afterShutdown() {
+ int numCommits = 5;
+
+ CommitWorkStreamTestStub testStub =
+ new CommitWorkStreamTestStub(new
TestCommitWorkStreamRequestObserver());
+ GrpcCommitWorkStream commitWorkStream = createCommitWorkStream(testStub);
+
+ try (WindmillStream.CommitWorkStream.RequestBatcher batcher =
commitWorkStream.batcher()) {
+ for (int i = 0; i < numCommits; i++) {
+ assertTrue(batcher.commitWorkItem(COMPUTATION_ID,
workItemCommitRequest(i), ignored -> {}));
+ }
+ }
+ commitWorkStream.shutdown();
+
+ Set<Windmill.CommitStatus> commitStatuses = new HashSet<>();
Review Comment:
ping on unresolved comment here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]