scwhittle commented on code in PR #32774:
URL: https://github.com/apache/beam/pull/32774#discussion_r1843484534


##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java:
##########
@@ -269,31 +312,43 @@ public final boolean awaitTermination(int time, TimeUnit 
unit) throws Interrupte
 
   @Override
   public final Instant startTime() {
-    return new Instant(startTimeMs.get());
+    return new Instant(debugMetrics.getStartTimeMs());
   }
 
   @Override
   public String backendWorkerToken() {
     return backendWorkerToken;
   }
 
+  @SuppressWarnings("GuardedBy")
   @Override
-  public void shutdown() {
-    if (isShutdown.compareAndSet(false, true)) {
-      requestObserver()
-          .onError(new WindmillStreamShutdownException("Explicit call to 
shutdown stream."));
+  public final void shutdown() {
+    // Don't lock on "this" before poisoning the request observer since 
otherwise the observer may
+    // be blocking in send().
+    requestObserver.poison();
+    synchronized (this) {
+      if (!isShutdown) {
+        isShutdown = true;
+        debugMetrics.recordShutdown();
+        shutdownInternal();
+      }
     }
   }
 
-  private void setLastError(String error) {
-    lastError.set(error);
-    lastErrorTime.set(DateTime.now());
-  }
-
-  public static class WindmillStreamShutdownException extends RuntimeException 
{
-    public WindmillStreamShutdownException(String message) {
-      super(message);
+  protected abstract void shutdownInternal();
+
+  /** Returns true if the stream was torn down and should not be restarted 
internally. */
+  private synchronized boolean maybeTearDownStream() {
+    if (requestObserver.hasReceivedPoisonPill()

Review Comment:
   what is this receivedPoisonPill check guarding against?
   It is racy because we poison outside of the synchronized block so we could 
have
   
   T1: notices unrelated stream failure, passes this check and isn't poisoned, 
starts calling onNewStream
   T2: calls shutdown, poisons request observer
   T1: calls requestObserver.reset() gets exception due to poison.
   
   Instead of checking the poison here, it seems like we should just handle the 
exception due to reset failing as that covers both cases.



##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/client/AbstractWindmillStream.java:
##########
@@ -269,31 +312,43 @@ public final boolean awaitTermination(int time, TimeUnit 
unit) throws Interrupte
 
   @Override
   public final Instant startTime() {
-    return new Instant(startTimeMs.get());
+    return new Instant(debugMetrics.getStartTimeMs());
   }
 
   @Override
   public String backendWorkerToken() {
     return backendWorkerToken;
   }
 
+  @SuppressWarnings("GuardedBy")

Review Comment:
   remove suppression



##########
runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/client/grpc/GrpcCommitWorkStreamTest.java:
##########
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker.windmill.client.grpc;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.argThat;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.spy;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import javax.annotation.Nullable;
+import 
org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc;
+import org.apache.beam.runners.dataflow.worker.windmill.Windmill;
+import org.apache.beam.runners.dataflow.worker.windmill.client.WindmillStream;
+import 
org.apache.beam.runners.dataflow.worker.windmill.client.grpc.observers.StreamObserverCancelledException;
+import 
org.apache.beam.runners.dataflow.worker.windmill.client.throttling.ThrottleTimer;
+import org.apache.beam.vendor.grpc.v1p60p1.com.google.protobuf.ByteString;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.ManagedChannel;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.Server;
+import 
org.apache.beam.vendor.grpc.v1p60p1.io.grpc.inprocess.InProcessChannelBuilder;
+import 
org.apache.beam.vendor.grpc.v1p60p1.io.grpc.inprocess.InProcessServerBuilder;
+import 
org.apache.beam.vendor.grpc.v1p60p1.io.grpc.stub.ServerCallStreamObserver;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.stub.StreamObserver;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.testing.GrpcCleanupRule;
+import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.util.MutableHandlerRegistry;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.InOrder;
+
+@RunWith(JUnit4.class)
+public class GrpcCommitWorkStreamTest {
+  private static final String FAKE_SERVER_NAME = "Fake server for 
GrpcCommitWorkStreamTest";
+  private static final Windmill.JobHeader TEST_JOB_HEADER =
+      Windmill.JobHeader.newBuilder()
+          .setJobId("test_job")
+          .setWorkerId("test_worker")
+          .setProjectId("test_project")
+          .build();
+  private static final String COMPUTATION_ID = "computationId";
+
+  @Rule public final GrpcCleanupRule grpcCleanup = new GrpcCleanupRule();
+  private final MutableHandlerRegistry serviceRegistry = new 
MutableHandlerRegistry();
+  @Rule public transient Timeout globalTimeout = Timeout.seconds(600);
+  private ManagedChannel inProcessChannel;
+
+  private static Windmill.WorkItemCommitRequest workItemCommitRequest(long 
value) {
+    return Windmill.WorkItemCommitRequest.newBuilder()
+        .setKey(ByteString.EMPTY)
+        .setShardingKey(value)
+        .setWorkToken(value)
+        .setCacheToken(value)
+        .build();
+  }
+
+  @Before
+  public void setUp() throws IOException {
+    Server server =
+        InProcessServerBuilder.forName(FAKE_SERVER_NAME)
+            .fallbackHandlerRegistry(serviceRegistry)
+            .directExecutor()
+            .build()
+            .start();
+
+    inProcessChannel =
+        grpcCleanup.register(
+            
InProcessChannelBuilder.forName(FAKE_SERVER_NAME).directExecutor().build());
+    grpcCleanup.register(server);
+    grpcCleanup.register(inProcessChannel);
+  }
+
+  @After
+  public void cleanUp() {
+    inProcessChannel.shutdownNow();
+  }
+
+  private GrpcCommitWorkStream createCommitWorkStream(CommitWorkStreamTestStub 
testStub) {
+    serviceRegistry.addService(testStub);
+    GrpcCommitWorkStream commitWorkStream =
+        (GrpcCommitWorkStream)
+            GrpcWindmillStreamFactory.of(TEST_JOB_HEADER)
+                .build()
+                .createCommitWorkStream(
+                    CloudWindmillServiceV1Alpha1Grpc.newStub(inProcessChannel),
+                    new ThrottleTimer());
+    commitWorkStream.start();
+    return commitWorkStream;
+  }
+
+  @Test
+  public void testShutdown_abortsQueuedCommits() throws InterruptedException {
+    int numCommits = 5;
+    CountDownLatch commitProcessed = new CountDownLatch(numCommits);
+    Set<Windmill.CommitStatus> onDone = new HashSet<>();
+
+    TestCommitWorkStreamRequestObserver requestObserver =
+        spy(new TestCommitWorkStreamRequestObserver());
+    CommitWorkStreamTestStub testStub = new 
CommitWorkStreamTestStub(requestObserver);
+    GrpcCommitWorkStream commitWorkStream = createCommitWorkStream(testStub);
+    InOrder requestObserverVerifier = inOrder(requestObserver);
+    try (WindmillStream.CommitWorkStream.RequestBatcher batcher = 
commitWorkStream.batcher()) {
+      for (int i = 0; i < numCommits; i++) {
+        batcher.commitWorkItem(
+            COMPUTATION_ID,
+            workItemCommitRequest(i),
+            commitStatus -> {
+              onDone.add(commitStatus);
+              commitProcessed.countDown();
+            });
+      }
+    } catch (StreamObserverCancelledException ignored) {
+    }
+
+    // Verify that we sent the commits above in a request + the initial header.
+    requestObserverVerifier
+        .verify(requestObserver)
+        .onNext(argThat(request -> 
request.getHeader().equals(TEST_JOB_HEADER)));
+    requestObserverVerifier
+        .verify(requestObserver)
+        .onNext(argThat(request -> !request.getCommitChunkList().isEmpty()));
+    requestObserverVerifier.verifyNoMoreInteractions();
+
+    // We won't get responses so we will have some pending requests.
+    assertTrue(commitWorkStream.hasPendingRequests());
+    commitWorkStream.shutdown();
+    commitProcessed.await();
+
+    assertThat(onDone).containsExactly(Windmill.CommitStatus.ABORTED);
+  }
+
+  @Test
+  public void testCommitWorkItem_afterShutdown() {
+    int numCommits = 5;
+
+    CommitWorkStreamTestStub testStub =
+        new CommitWorkStreamTestStub(new 
TestCommitWorkStreamRequestObserver());
+    GrpcCommitWorkStream commitWorkStream = createCommitWorkStream(testStub);
+
+    try (WindmillStream.CommitWorkStream.RequestBatcher batcher = 
commitWorkStream.batcher()) {
+      for (int i = 0; i < numCommits; i++) {
+        assertTrue(batcher.commitWorkItem(COMPUTATION_ID, 
workItemCommitRequest(i), ignored -> {}));
+      }
+    }
+    commitWorkStream.shutdown();
+
+    Set<Windmill.CommitStatus> commitStatuses = new HashSet<>();

Review Comment:
   ping on unresolved comment here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to