[ 
https://issues.apache.org/jira/browse/BEAM-5041?focusedWorklogId=132528&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-132528
 ]

ASF GitHub Bot logged work on BEAM-5041:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 08/Aug/18 18:46
            Start Date: 08/Aug/18 18:46
    Worklog Time Spent: 10m 
      Work Description: lukecwik closed pull request #6093: [BEAM-5041] Java Fn 
SDK Harness use pTransform to track processed graph
URL: https://github.com/apache/beam/pull/6093
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
index c76357cdf7a..2336aa8ffb4 100644
--- 
a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
+++ 
b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
@@ -29,6 +29,7 @@
 import com.google.common.collect.Sets;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.ServiceLoader;
@@ -145,6 +146,7 @@ private void 
createRunnerAndConsumersForPTransformRecursively(
       ProcessBundleDescriptor processBundleDescriptor,
       SetMultimap<String, String> pCollectionIdsToConsumingPTransforms,
       ListMultimap<String, FnDataReceiver<WindowedValue<?>>> 
pCollectionIdsToConsumers,
+      Set<String> processedPTransformIds,
       Consumer<ThrowingRunnable> addStartFunction,
       Consumer<ThrowingRunnable> addFinishFunction,
       BundleSplitListener splitListener)
@@ -154,10 +156,6 @@ private void 
createRunnerAndConsumersForPTransformRecursively(
     // Since we are creating the consumers first, we know that the we are 
building the DAG
     // in reverse topological order.
     for (String pCollectionId : pTransform.getOutputsMap().values()) {
-      // If we have created the consumers for this PCollection we can skip it.
-      if (pCollectionIdsToConsumers.containsKey(pCollectionId)) {
-        continue;
-      }
 
       for (String consumingPTransformId : 
pCollectionIdsToConsumingPTransforms.get(pCollectionId)) {
         createRunnerAndConsumersForPTransformRecursively(
@@ -168,6 +166,7 @@ private void 
createRunnerAndConsumersForPTransformRecursively(
             processBundleDescriptor,
             pCollectionIdsToConsumingPTransforms,
             pCollectionIdsToConsumers,
+            processedPTransformIds,
             addStartFunction,
             addFinishFunction,
             splitListener);
@@ -185,23 +184,26 @@ private void 
createRunnerAndConsumersForPTransformRecursively(
           String.format(
               "Cannot process composite transform: %s", 
TextFormat.printToString(pTransform)));
     }
-
-    urnToPTransformRunnerFactoryMap
-        .getOrDefault(pTransform.getSpec().getUrn(), 
defaultPTransformRunnerFactory)
-        .createRunnerForPTransform(
-            options,
-            beamFnDataClient,
-            beamFnStateClient,
-            pTransformId,
-            pTransform,
-            processBundleInstructionId,
-            processBundleDescriptor.getPcollectionsMap(),
-            processBundleDescriptor.getCodersMap(),
-            processBundleDescriptor.getWindowingStrategiesMap(),
-            pCollectionIdsToConsumers,
-            addStartFunction,
-            addFinishFunction,
-            splitListener);
+    // Skip reprocessing processed pTransforms.
+    if (!processedPTransformIds.contains(pTransformId)) {
+      urnToPTransformRunnerFactoryMap
+          .getOrDefault(pTransform.getSpec().getUrn(), 
defaultPTransformRunnerFactory)
+          .createRunnerForPTransform(
+              options,
+              beamFnDataClient,
+              beamFnStateClient,
+              pTransformId,
+              pTransform,
+              processBundleInstructionId,
+              processBundleDescriptor.getPcollectionsMap(),
+              processBundleDescriptor.getCodersMap(),
+              processBundleDescriptor.getWindowingStrategiesMap(),
+              pCollectionIdsToConsumers,
+              addStartFunction,
+              addFinishFunction,
+              splitListener);
+      processedPTransformIds.add(pTransformId);
+    }
   }
 
   public BeamFnApi.InstructionResponse.Builder 
processBundle(BeamFnApi.InstructionRequest request)
@@ -213,6 +215,7 @@ private void 
createRunnerAndConsumersForPTransformRecursively(
     SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = 
HashMultimap.create();
     ListMultimap<String, FnDataReceiver<WindowedValue<?>>> 
pCollectionIdsToConsumers =
         ArrayListMultimap.create();
+    HashSet<String> processedPTransformIds = new HashSet<>();
     List<ThrowingRunnable> startFunctions = new ArrayList<>();
     List<ThrowingRunnable> finishFunctions = new ArrayList<>();
 
@@ -271,6 +274,7 @@ private void 
createRunnerAndConsumersForPTransformRecursively(
             bundleDescriptor,
             pCollectionIdsToConsumingPTransforms,
             pCollectionIdsToConsumers,
+            processedPTransformIds,
             startFunctions::add,
             finishFunctions::add,
             splitListener);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 132528)
    Time Spent: 2h 20m  (was: 2h 10m)

> Java Fn SDK Harness skips unprocessed pCollections
> --------------------------------------------------
>
>                 Key: BEAM-5041
>                 URL: https://issues.apache.org/jira/browse/BEAM-5041
>             Project: Beam
>          Issue Type: Bug
>          Components: sdk-java-harness
>            Reporter: Ankur Goenka
>            Assignee: Ankur Goenka
>            Priority: Major
>             Fix For: 2.7.0
>
>          Time Spent: 2h 20m
>  Remaining Estimate: 0h
>
> Java Sdk Harness used pCollections to keep track of computed consumers 
> [here|https://github.com/apache/beam/blob/ff95a82e461bd8319d9733be60e75992ba90cd7c/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java#L158].
>  This is incorrect as consumers are based on pTransforms so pTransforms 
> should be used to keep track of computed consumers.
> In case of Flatten, this creates an issue where pTransforms having same input 
> as that to flatten are not executed. This causes 
> [https://github.com/apache/beam/blob/ff95a82e461bd8319d9733be60e75992ba90cd7c/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/FlattenTest.java#L316]
>  to fail.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to