scwhittle commented on code in PR #34053:
URL: https://github.com/apache/beam/pull/34053#discussion_r2016214989


##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/harness/BackendWorkerMetadataVendor.java:
##########
@@ -75,69 +81,77 @@ static BackendWorkerMetadataVendor create(
    * Returns a {@link CompletableFuture} that can be used to block until the 
initial {@link
    * GetWorkerMetadataStream} has been started.
    */
-  CompletableFuture<Void> start(Consumer<WindmillEndpoints> endpointsConsumer) 
{
-    if (isRunning.compareAndSet(false, true)) {
-      LOG.debug("Starting WorkerMetadataVendor...");
-
-      CountDownLatch initialStreamStarted = new CountDownLatch(1);
-
-      workerMetadataFetcher.execute(
-          () -> {
-            boolean isInitialWorkerMetadata = true;
-            WorkerMetadataResponse initialWorkerMetadata = NO_WORKER_METADATA;
-
-            while (isRunning.get()) {
-              GetWorkerMetadataStream getWorkerMetadataStream =
-                  getWorkerMetadataStreamFactory.create(initialWorkerMetadata, 
endpointsConsumer);
-              LOG.debug(
-                  "Starting GetWorkerMetadataStream w/ metadata version {}.",
-                  initialWorkerMetadata.getMetadataVersion());
-              getWorkerMetadataStream.start();
-
-              if (isInitialWorkerMetadata) {
-                isInitialWorkerMetadata = false;
-                initialStreamStarted.countDown();
+  synchronized void start(Consumer<WindmillEndpoints> endpointsConsumer) {
+    checkState(!isStarted, "Multiple calls to 
BackendWorkerMetadataVendor.start() is not allowed.");
+    checkState(!isShutdown, "BackendWorkerMetadataVendor has previously been 
shutdown.");
+    LOG.debug("Starting WorkerMetadataVendor...");
+
+    fetchBackendWorkerMetadataExecutor.execute(
+        () -> {
+          WorkerMetadataResponse initialWorkerMetadata = NO_WORKER_METADATA;
+
+          while (isStarted && !isShutdown) {

Review Comment:
   remove the isStarted check, it could be racy since isStarted is set after 
this is queued to execute and this could happen first and just stop observing 
metadata
   
   could also remove the isShutdown check since the interruption should catch 
that.



##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/harness/BackendWorkerMetadataVendor.java:
##########
@@ -75,69 +81,77 @@ static BackendWorkerMetadataVendor create(
    * Returns a {@link CompletableFuture} that can be used to block until the 
initial {@link
    * GetWorkerMetadataStream} has been started.
    */
-  CompletableFuture<Void> start(Consumer<WindmillEndpoints> endpointsConsumer) 
{
-    if (isRunning.compareAndSet(false, true)) {
-      LOG.debug("Starting WorkerMetadataVendor...");
-
-      CountDownLatch initialStreamStarted = new CountDownLatch(1);
-
-      workerMetadataFetcher.execute(
-          () -> {
-            boolean isInitialWorkerMetadata = true;
-            WorkerMetadataResponse initialWorkerMetadata = NO_WORKER_METADATA;
-
-            while (isRunning.get()) {
-              GetWorkerMetadataStream getWorkerMetadataStream =
-                  getWorkerMetadataStreamFactory.create(initialWorkerMetadata, 
endpointsConsumer);
-              LOG.debug(
-                  "Starting GetWorkerMetadataStream w/ metadata version {}.",
-                  initialWorkerMetadata.getMetadataVersion());
-              getWorkerMetadataStream.start();
-
-              if (isInitialWorkerMetadata) {
-                isInitialWorkerMetadata = false;
-                initialStreamStarted.countDown();
+  synchronized void start(Consumer<WindmillEndpoints> endpointsConsumer) {
+    checkState(!isStarted, "Multiple calls to 
BackendWorkerMetadataVendor.start() is not allowed.");
+    checkState(!isShutdown, "BackendWorkerMetadataVendor has previously been 
shutdown.");
+    LOG.debug("Starting WorkerMetadataVendor...");
+
+    fetchBackendWorkerMetadataExecutor.execute(
+        () -> {
+          WorkerMetadataResponse initialWorkerMetadata = NO_WORKER_METADATA;
+
+          while (isStarted && !isShutdown) {
+            GetWorkerMetadataStream getWorkerMetadataStream =
+                getWorkerMetadataStreamFactory.create(initialWorkerMetadata, 
endpointsConsumer);
+            LOG.debug(
+                "Starting GetWorkerMetadataStream w/ metadata version {}.",
+                initialWorkerMetadata.getMetadataVersion());
+            getWorkerMetadataStream.start();
+            initialStreamStarted.countDown();
+
+            // Await stream termination and propagate the most current worker 
metadata to start
+            // the next stream.
+            try {
+              initialWorkerMetadata = 
awaitGracefulTermination(getWorkerMetadataStream);
+            } catch (InterruptedException e) {
+              if (isShutdown) {
+                break;
               }
 
-              // Await stream termination and propagate the most current 
worker metadata to start
-              // the next stream.
-              initialWorkerMetadata = 
awaitGracefulTermination(getWorkerMetadataStream);
-              LOG.debug(
-                  "Current GetWorkerMetadataStream terminated. Propagating 
metadata version {} to the next stream.",
-                  initialWorkerMetadata.getMetadataVersion());
+              throw new IllegalStateException(
+                  "BackendWorkerMetadataVendor interrupted unexpectedly.", e);
             }
-          });
 
-      return CompletableFuture.runAsync(
-          () -> {
-            int secondsWaited = 0;
-            try {
-              while (!initialStreamStarted.await(10, TimeUnit.SECONDS) && 
isRunning.get()) {
-                secondsWaited += 10;
-                LOG.debug(
-                    "Waited {} seconds for initial GetWorkerMetadataStream to 
start.",
-                    secondsWaited);
-              }
-            } catch (InterruptedException e) {
-              LOG.debug("Interrupted waiting for initial 
GetWorkerMetadataStream to start.");
-              Thread.currentThread().interrupt();
-            }
-          },
-          // Run this on the calling thread.
-          MoreExecutors.directExecutor());
-    }
+            LOG.debug(
+                "Current GetWorkerMetadataStream terminated. Propagating 
metadata version {} to the next stream.",
+                initialWorkerMetadata.getMetadataVersion());
+          }
+
+          LOG.info("BackendWorkerMetadata vending complete.");
+        });
 
-    return CompletableFuture.completedFuture(null);
+    isStarted = true;
   }
 
-  void stop() {
-    if (isRunning.compareAndSet(true, false)) {
+  /**
+   * Wait for the initial backend worker metadata stream to start.
+   *
+   * @implNote Blocks the calling thread until the stream starts or {@link 
#shutdown()} is called.
+   */
+  void awaitInitialBackendWorkerMetadataStream() {
+    boolean isInitialStreamStarted = false;
+    int waitedSeconds = 0;
+    while (!isInitialStreamStarted && !isShutdown) {
+      try {
+        isInitialStreamStarted = initialStreamStarted.await(10, 
TimeUnit.SECONDS);

Review Comment:
   just break here if the await returns true?
   can remove the isInitialStreamStarted variable then.



##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/streaming/harness/BackendWorkerMetadataVendor.java:
##########
@@ -75,69 +81,77 @@ static BackendWorkerMetadataVendor create(
    * Returns a {@link CompletableFuture} that can be used to block until the 
initial {@link
    * GetWorkerMetadataStream} has been started.
    */
-  CompletableFuture<Void> start(Consumer<WindmillEndpoints> endpointsConsumer) 
{
-    if (isRunning.compareAndSet(false, true)) {
-      LOG.debug("Starting WorkerMetadataVendor...");
-
-      CountDownLatch initialStreamStarted = new CountDownLatch(1);
-
-      workerMetadataFetcher.execute(
-          () -> {
-            boolean isInitialWorkerMetadata = true;
-            WorkerMetadataResponse initialWorkerMetadata = NO_WORKER_METADATA;
-
-            while (isRunning.get()) {
-              GetWorkerMetadataStream getWorkerMetadataStream =
-                  getWorkerMetadataStreamFactory.create(initialWorkerMetadata, 
endpointsConsumer);
-              LOG.debug(
-                  "Starting GetWorkerMetadataStream w/ metadata version {}.",
-                  initialWorkerMetadata.getMetadataVersion());
-              getWorkerMetadataStream.start();
-
-              if (isInitialWorkerMetadata) {
-                isInitialWorkerMetadata = false;
-                initialStreamStarted.countDown();
+  synchronized void start(Consumer<WindmillEndpoints> endpointsConsumer) {
+    checkState(!isStarted, "Multiple calls to 
BackendWorkerMetadataVendor.start() is not allowed.");
+    checkState(!isShutdown, "BackendWorkerMetadataVendor has previously been 
shutdown.");
+    LOG.debug("Starting WorkerMetadataVendor...");
+
+    fetchBackendWorkerMetadataExecutor.execute(
+        () -> {
+          WorkerMetadataResponse initialWorkerMetadata = NO_WORKER_METADATA;
+
+          while (isStarted && !isShutdown) {
+            GetWorkerMetadataStream getWorkerMetadataStream =
+                getWorkerMetadataStreamFactory.create(initialWorkerMetadata, 
endpointsConsumer);
+            LOG.debug(
+                "Starting GetWorkerMetadataStream w/ metadata version {}.",
+                initialWorkerMetadata.getMetadataVersion());
+            getWorkerMetadataStream.start();
+            initialStreamStarted.countDown();
+
+            // Await stream termination and propagate the most current worker 
metadata to start
+            // the next stream.
+            try {
+              initialWorkerMetadata = 
awaitGracefulTermination(getWorkerMetadataStream);
+            } catch (InterruptedException e) {
+              if (isShutdown) {
+                break;
               }
 
-              // Await stream termination and propagate the most current 
worker metadata to start
-              // the next stream.
-              initialWorkerMetadata = 
awaitGracefulTermination(getWorkerMetadataStream);
-              LOG.debug(
-                  "Current GetWorkerMetadataStream terminated. Propagating 
metadata version {} to the next stream.",
-                  initialWorkerMetadata.getMetadataVersion());
+              throw new IllegalStateException(
+                  "BackendWorkerMetadataVendor interrupted unexpectedly.", e);
             }
-          });
 
-      return CompletableFuture.runAsync(
-          () -> {
-            int secondsWaited = 0;
-            try {
-              while (!initialStreamStarted.await(10, TimeUnit.SECONDS) && 
isRunning.get()) {
-                secondsWaited += 10;
-                LOG.debug(
-                    "Waited {} seconds for initial GetWorkerMetadataStream to 
start.",
-                    secondsWaited);
-              }
-            } catch (InterruptedException e) {
-              LOG.debug("Interrupted waiting for initial 
GetWorkerMetadataStream to start.");
-              Thread.currentThread().interrupt();
-            }
-          },
-          // Run this on the calling thread.
-          MoreExecutors.directExecutor());
-    }
+            LOG.debug(
+                "Current GetWorkerMetadataStream terminated. Propagating 
metadata version {} to the next stream.",
+                initialWorkerMetadata.getMetadataVersion());
+          }
+
+          LOG.info("BackendWorkerMetadata vending complete.");
+        });
 
-    return CompletableFuture.completedFuture(null);
+    isStarted = true;
   }
 
-  void stop() {
-    if (isRunning.compareAndSet(true, false)) {
+  /**
+   * Wait for the initial backend worker metadata stream to start.
+   *
+   * @implNote Blocks the calling thread until the stream starts or {@link 
#shutdown()} is called.
+   */
+  void awaitInitialBackendWorkerMetadataStream() {
+    boolean isInitialStreamStarted = false;
+    int waitedSeconds = 0;
+    while (!isInitialStreamStarted && !isShutdown) {
+      try {
+        isInitialStreamStarted = initialStreamStarted.await(10, 
TimeUnit.SECONDS);
+        waitedSeconds += 10;
+        LOG.debug("Waited {}s for initial worker metadata stream to start.", 
waitedSeconds);
+      } catch (InterruptedException e) {
+        LOG.warn(
+            "Interrupted waiting for initial worker metadata stream. Retrying 
until shutdown() is called.");
+      }
+    }
+  }
+
+  synchronized void shutdown() {
+    if (!isShutdown) {
       LOG.debug("Shutting down WorkerMetadataVendor...");
-      workerMetadataFetcher.shutdownNow();
+      isShutdown = true;
+      fetchBackendWorkerMetadataExecutor.shutdownNow();
       boolean isShutdown = false;
       try {
         isShutdown =
-            workerMetadataFetcher.awaitTermination(
+            fetchBackendWorkerMetadataExecutor.awaitTermination(

Review Comment:
   seems like you could move the log here and get rid of isShutdown var which 
is confusing with the member variable of same name.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to