Repository: crunch Updated Branches: refs/heads/apache-crunch-0.8 a5c592768 -> 3a760cdae
CRUNCH-456 Abbreviate long node names in dot file Abbreviate node names down to 300 characters in job plan dot files to ensure that the output dot files are both valid and readable. Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/3a760cda Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/3a760cda Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/3a760cda Branch: refs/heads/apache-crunch-0.8 Commit: 3a760cdae913943449083d2863f7bcb40572a7cb Parents: a5c5927 Author: Gabriel Reid <[email protected]> Authored: Tue Aug 5 08:51:58 2014 +0200 Committer: Gabriel Reid <[email protected]> Committed: Tue Aug 5 17:26:00 2014 +0200 ---------------------------------------------------------------------- .../crunch/impl/mr/plan/DotfileWriter.java | 54 ++++++++++++++------ .../crunch/impl/mr/plan/DotfileWriterTest.java | 22 ++++++-- 2 files changed, 57 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/3a760cda/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java index 4d88296..de96852 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileWriter.java @@ -21,19 +21,19 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.crunch.Pair; -import org.apache.crunch.SourceTarget; -import org.apache.crunch.Target; -import org.apache.crunch.impl.dist.collect.PCollectionImpl; -import org.apache.crunch.impl.mr.collect.InputCollection; -import org.apache.crunch.impl.mr.collect.PGroupedTableImpl; - import com.google.common.base.Joiner; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import org.apache.commons.lang.StringUtils; +import org.apache.crunch.Pair; +import org.apache.crunch.SourceTarget; +import org.apache.crunch.Target; +import org.apache.crunch.impl.dist.collect.PCollectionImpl; +import org.apache.crunch.impl.mr.collect.InputCollection; +import org.apache.crunch.impl.mr.collect.PGroupedTableImpl; /** * Writes <a href="http://www.graphviz.org">Graphviz</a> dot files to illustrate @@ -41,8 +41,11 @@ import com.google.common.collect.Sets; */ public class DotfileWriter { + // Maximum length that a node name may have in the produced dot file + static final int MAX_NODE_NAME_LENGTH = 300; + /** The types of tasks within a MapReduce job. */ - enum MRTaskType { MAP, REDUCE }; + enum MRTaskType { MAP, REDUCE } private Set<JobPrototype> jobPrototypes = Sets.newHashSet(); private HashMultimap<Pair<JobPrototype, MRTaskType>, String> jobNodeDeclarations = HashMultimap.create(); @@ -61,7 +64,9 @@ public class DotfileWriter { if (pcollectionImpl instanceof InputCollection) { shape = "folder"; } - return String.format("%s [label=\"%s\" shape=%s];", formatPCollection(pcollectionImpl, jobPrototype), pcollectionImpl.getName(), + return String.format("%s [label=\"%s\" shape=%s];", + formatPCollection(pcollectionImpl, jobPrototype), + limitNodeNameLength(pcollectionImpl.getName()), shape); } @@ -72,7 +77,8 @@ public class DotfileWriter { * @return The global node declaration for the Target */ String formatTargetNodeDeclaration(Target target) { - return String.format("\"%s\" [label=\"%s\" shape=folder];", target.toString(), target.toString()); + String nodeName = limitNodeNameLength(target.toString()); + return String.format("\"%s\" [label=\"%s\" shape=folder];", nodeName, nodeName); } /** @@ -85,9 +91,11 @@ public class DotfileWriter { String formatPCollection(PCollectionImpl<?> pcollectionImpl, JobPrototype jobPrototype) { if (pcollectionImpl instanceof InputCollection) { InputCollection<?> inputCollection = (InputCollection<?>) pcollectionImpl; - return String.format("\"%s\"", inputCollection.getSource()); + return String.format("\"%s\"", limitNodeNameLength(inputCollection.getSource().toString())); } - return String.format("\"%s@%d@%d\"", pcollectionImpl.getName(), pcollectionImpl.hashCode(), jobPrototype.hashCode()); + return String.format("\"%s\"", + limitNodeNameLength( + String.format("%s@%d@%d", pcollectionImpl.getName(), pcollectionImpl.hashCode(), jobPrototype.hashCode()))); } /** @@ -97,7 +105,23 @@ public class DotfileWriter { * @return The dot-formatted chain of nodes */ String formatNodeCollection(List<String> nodeCollection) { - return formatNodeCollection(nodeCollection, ImmutableMap.<String,String>of()); + return formatNodeCollection(nodeCollection, ImmutableMap.<String, String>of()); + } + + /** + * Limit a node name length down to {@link #MAX_NODE_NAME_LENGTH}, to ensure valid (and readable) dot files. If the + * name is already less than or equal to the maximum length, it will be returned untouched. + * + * @param nodeName node name to be limited in length + * @return the abbreviated node name if it was longer than the given maximum allowable length + */ + static String limitNodeNameLength(String nodeName) { + if (nodeName.length() <= MAX_NODE_NAME_LENGTH) { + return nodeName; + } + String hashString = Integer.toString(nodeName.hashCode()); + return String.format("%s@%s", + StringUtils.abbreviate(nodeName, MAX_NODE_NAME_LENGTH - (hashString.length() + 1)), hashString); } /** @@ -140,7 +164,7 @@ public class DotfileWriter { String toNode = formatPCollection(pcollection, jobPrototype); for(Target target : targetDeps) { globalNodeDeclarations.add(formatTargetNodeDeclaration(target)); - String fromNode = String.format("\"%s\"", target.toString()); + String fromNode = String.format("\"%s\"", limitNodeNameLength(target.toString())); formattedNodePaths.add( formatNodeCollection( ImmutableList.of(fromNode, toNode), @@ -210,7 +234,7 @@ public class DotfileWriter { addNodePathChain(nodePath, jobPrototype); nodePathChains.add(formatNodeCollection( Lists.newArrayList(formatPCollection(nodePath.descendingIterator().next(), jobPrototype), - String.format("\"%s\"", target.toString())))); + String.format("\"%s\"", limitNodeNameLength(target.toString()))))); } } } http://git-wip-us.apache.org/repos/asf/crunch/blob/3a760cda/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java index 4b183ac..239da53 100644 --- a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java +++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/DotfileWriterTest.java @@ -18,11 +18,16 @@ package org.apache.crunch.impl.mr.plan; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.util.List; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import org.apache.crunch.ParallelDoOptions; import org.apache.crunch.Source; import org.apache.crunch.SourceTarget; @@ -34,10 +39,6 @@ import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Lists; - public class DotfileWriterTest { private DotfileWriter dotfileWriter; @@ -170,4 +171,17 @@ public class DotfileWriterTest { assertEquals("label = Reduce; color = red;", dotfileWriter.getTaskGraphAttributes(MRTaskType.REDUCE)); } + @Test + public void testLimitNodeNameLength_AlreadyWithinLimit() { + String nodeName = "within_limit"; + assertEquals(nodeName, DotfileWriter.limitNodeNameLength(nodeName)); + } + + @Test + public void testLimitNodeNameLength_OverLimit() { + String nodeName = Strings.repeat("x", DotfileWriter.MAX_NODE_NAME_LENGTH + 1); + String abbreviated = DotfileWriter.limitNodeNameLength(nodeName); + assertEquals(DotfileWriter.MAX_NODE_NAME_LENGTH, abbreviated.length()); + assertTrue(abbreviated.startsWith("xxxxx")); + } }
