[ https://issues.apache.org/jira/browse/BEAM-4834?focusedWorklogId=125651&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-125651 ]
ASF GitHub Bot logged work on BEAM-4834: ---------------------------------------- Author: ASF GitHub Bot Created on: 20/Jul/18 21:14 Start Date: 20/Jul/18 21:14 Worklog Time Spent: 10m Work Description: angoenka closed pull request #6007: [BEAM-4834] Identify loops before doing topological sort of pipeline network URL: https://github.com/apache/beam/pull/6007 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/Networks.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/Networks.java index 9f41a6663f0..702a8f8f1e3 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/Networks.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/graph/Networks.java @@ -176,6 +176,28 @@ public final NodeT apply(NodeT input) { return computeTopologicalOrder(orderedNetwork); } + /** + * Do a simple DFS traversal to find cycle in the graph. Not using Graphs.hasCycle as it does not + * provide the found cycle. + */ + private static <NodeT, EdgeT> void checkCycle(Network<NodeT, EdgeT> network) { + ArrayDeque<NodeT> parents = new ArrayDeque<>(); + HashSet<NodeT> parentsSet = new HashSet<>(); + network.nodes().forEach(node -> dfs(network, node, parents, parentsSet)); + } + + private static <NodeT, EdgeT> void dfs( + Network<NodeT, EdgeT> network, NodeT node, ArrayDeque<NodeT> parents, Set<NodeT> parentsSet) { + if (parentsSet.contains(node)) { + checkArgument(!parents.contains(node), "Network has a cycle %s on node %s", parents, node); + } + parents.addLast(node); + parentsSet.add(node); + network.successors(node).forEach(successor -> dfs(network, successor, parents, parentsSet)); + parentsSet.remove(node); + parents.removeLast(); + } + /** * Compute the topological order for a {@link Network}. * @@ -193,6 +215,7 @@ public final NodeT apply(NodeT input) { "Only networks without self loops are supported, given %s", network); + checkCycle(network); // Linked hashset will prevent duplicates from appearing and will maintain insertion order. LinkedHashSet<NodeT> nodes = new LinkedHashSet<>(network.nodes().size()); Queue<NodeT> processingOrder = new ArrayDeque<>(); diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/NetworksTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/NetworksTest.java index 2dd8834585f..20f7d9700a1 100644 --- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/NetworksTest.java +++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/graph/NetworksTest.java @@ -83,6 +83,46 @@ public void testTopologicalSort() { } } + @Test(expected = IllegalArgumentException.class) + public void testTopologicalSortWithCycle() { + MutableNetwork<String, String> network = createEmptyNetwork(); + network.addNode("A"); + network.addNode("B"); + network.addNode("C"); + network.addNode("D"); + network.addNode("E"); + + network.addEdge("A", "B", "AB"); + network.addEdge("B", "C", "BC"); + network.addEdge("C", "D", "CD"); + network.addEdge("D", "E", "DE"); + // Cycle between B -> C -> D -> + network.addEdge("D", "B", "DB"); + + Iterable<String> sortedNodes = Networks.topologicalOrder(network); + Map<String, Integer> nodeToPosition = new HashMap<>(Iterables.size(sortedNodes)); + int i = 0; + for (String node : sortedNodes) { + nodeToPosition.put(node, i); + i += 1; + } + + for (String node : network.nodes()) { + for (String descendant : + Sets.difference( + Networks.reachableNodes(network, ImmutableSet.of(node), Collections.emptySet()), + Sets.newHashSet(node))) { + assertThat( + String.format( + "Expected position of node %s to be before descendant %s," + + " order returned %s for network %s", + node, descendant, sortedNodes, network), + nodeToPosition.get(descendant), + greaterThan(nodeToPosition.get(node))); + } + } + } + @Test public void testTopologicalSortWithSuborder() { // This cast is required to narrow the type accepted by the comparator ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 125651) Time Spent: 1h 10m (was: 1h) > Validate cycles in org.apache.beam.runners.core.construction.graph.Network > before doing topological sort > -------------------------------------------------------------------------------------------------------- > > Key: BEAM-4834 > URL: https://issues.apache.org/jira/browse/BEAM-4834 > Project: Beam > Issue Type: Bug > Components: runner-core > Reporter: Ankur Goenka > Assignee: Ankur Goenka > Priority: Major > Time Spent: 1h 10m > Remaining Estimate: 0h > > Cyclic graphs will never finish the topological sort so we should check the > cycle before doing the topological sort. -- This message was sent by Atlassian JIRA (v7.6.3#76005)