Updated Branches: refs/heads/master 35136cb4e -> fee4d1654
CRUNCH-262: Shorten job name if it is too long (putting "..." to the end) Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/fee4d165 Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/fee4d165 Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/fee4d165 Branch: refs/heads/master Commit: fee4d1654f791cf314b83141cef9fe99703b6477 Parents: 35136cb Author: Chao Shi <[email protected]> Authored: Thu Sep 12 10:25:00 2013 +0800 Committer: Chao Shi <[email protected]> Committed: Thu Sep 12 10:40:10 2013 +0800 ---------------------------------------------------------------------- .../crunch/impl/mr/plan/JobNameBuilder.java | 31 ++++++++++++++++++-- .../crunch/impl/mr/plan/JobPrototype.java | 17 ++++++----- .../apache/crunch/impl/mr/plan/MSCRPlanner.java | 6 ++-- .../crunch/impl/mr/plan/PlanningParameters.java | 2 ++ .../crunch/impl/mr/plan/JobNameBuilderTest.java | 31 ++++++++++++++++---- 5 files changed, 68 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java index 9ad7300..6fac1be 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobNameBuilder.java @@ -19,8 +19,10 @@ package org.apache.crunch.impl.mr.plan; import java.util.List; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.collect.Lists; +import org.apache.hadoop.conf.Configuration; /** * Visitor that traverses the {@code DoNode} instances in a job and builds a @@ -30,12 +32,20 @@ class JobNameBuilder { private static final Joiner JOINER = Joiner.on("+"); private static final Joiner CHILD_JOINER = Joiner.on("/"); + private static final int DEFAULT_JOB_NAME_MAX_STACK_LENGTH = 60; - private String pipelineName; + private final String pipelineName; + private final int jobID; + private final int numOfJobs; List<String> rootStack = Lists.newArrayList(); + private final int maxStackNameLength; - public JobNameBuilder(final String pipelineName) { + public JobNameBuilder(Configuration conf, String pipelineName, int jobID, int numOfJobs) { this.pipelineName = pipelineName; + this.jobID = jobID; + this.numOfJobs = numOfJobs; + this.maxStackNameLength = conf.getInt( + PlanningParameters.JOB_NAME_MAX_STACK_LENGTH, DEFAULT_JOB_NAME_MAX_STACK_LENGTH); } public void visit(DoNode node) { @@ -74,6 +84,21 @@ class JobNameBuilder { } public String build() { - return String.format("%s: %s", pipelineName, JOINER.join(rootStack)); + return String.format("%s: %s (%d/%d)", + pipelineName, + shortenRootStackName(JOINER.join(rootStack), maxStackNameLength), + jobID, + numOfJobs); + } + + private static String shortenRootStackName(String s, int len) { + int n = s.length(); + if (len <= 3) { + return "..."; + } + if (n <= len) { + return s; + } + return s.substring(0, len - 3) + "..."; } } http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java index c733323..c9b7111 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/JobPrototype.java @@ -127,17 +127,19 @@ class JobPrototype { this.dependencies.add(dependency); } - public CrunchControlledJob getCrunchJob(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException { + public CrunchControlledJob getCrunchJob( + Class<?> jarClass, Configuration conf, Pipeline pipeline, int numOfJobs) throws IOException { if (job == null) { - job = build(jarClass, conf, pipeline); + job = build(jarClass, conf, pipeline, numOfJobs); for (JobPrototype proto : dependencies) { - job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline)); + job.addDependingJob(proto.getCrunchJob(jarClass, conf, pipeline, numOfJobs)); } } return job; } - private CrunchControlledJob build(Class<?> jarClass, Configuration conf, Pipeline pipeline) throws IOException { + private CrunchControlledJob build( + Class<?> jarClass, Configuration conf, Pipeline pipeline, int numOfJobs) throws IOException { Job job = new Job(conf); conf = job.getConfiguration(); conf.set(PlanningParameters.CRUNCH_WORKING_DIRECTORY, workingPath.toString()); @@ -220,7 +222,7 @@ class JobPrototype { } job.setInputFormatClass(CrunchInputFormat.class); } - job.setJobName(createJobName(pipeline.getName(), inputNodes, reduceNode)); + job.setJobName(createJobName(conf, pipeline.getName(), inputNodes, reduceNode, numOfJobs)); return new CrunchControlledJob( jobID, @@ -239,8 +241,9 @@ class JobPrototype { DistCache.write(conf, path, rtNodes); } - private String createJobName(String pipelineName, List<DoNode> mapNodes, DoNode reduceNode) { - JobNameBuilder builder = new JobNameBuilder(pipelineName); + private String createJobName( + Configuration conf, String pipelineName, List<DoNode> mapNodes, DoNode reduceNode, int numOfJobs) { + JobNameBuilder builder = new JobNameBuilder(conf, pipelineName, jobID, numOfJobs); builder.visit(mapNodes); if (reduceNode != null) { builder.visit(reduceNode); http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java index 5ad5ca1..f765313 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java @@ -113,7 +113,7 @@ public class MSCRPlanner { // job prototype a particular GBK is assigned to. Multimap<Vertex, JobPrototype> newAssignments = HashMultimap.create(); for (List<Vertex> component : components) { - newAssignments.putAll(constructJobPrototypes(component)); + newAssignments.putAll(constructJobPrototypes(component, components.size())); } // Add in the job dependency information here. @@ -154,7 +154,7 @@ public class MSCRPlanner { MRExecutor exec = new MRExecutor(jarClass, outputs, toMaterialize); for (JobPrototype proto : Sets.newHashSet(assignments.values())) { dotfileWriter.addJobPrototype(proto); - exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline)); + exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline, lastJobID)); } String planDotFile = dotfileWriter.buildDotfile(); @@ -239,7 +239,7 @@ public class MSCRPlanner { return graph; } - private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component) { + private Multimap<Vertex, JobPrototype> constructJobPrototypes(List<Vertex> component, int numOfJobs) { Multimap<Vertex, JobPrototype> assignment = HashMultimap.create(); List<Vertex> gbks = Lists.newArrayList(); for (Vertex v : component) { http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java index b90a911..d0b74b7 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java @@ -33,6 +33,8 @@ public class PlanningParameters { */ public static final String PIPELINE_PLAN_DOTFILE = "crunch.planner.dotfile"; + public static final String JOB_NAME_MAX_STACK_LENGTH = "crunch.job.name.max.stack.length"; + private PlanningParameters() { } } http://git-wip-us.apache.org/repos/asf/crunch/blob/fee4d165/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java index 0a30fa4..3ba2763 100644 --- a/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java +++ b/crunch-core/src/test/java/org/apache/crunch/impl/mr/plan/JobNameBuilderTest.java @@ -17,25 +17,44 @@ */ package org.apache.crunch.impl.mr.plan; -import static org.junit.Assert.assertEquals; - +import com.google.common.base.Strings; +import com.google.common.collect.Lists; import org.apache.crunch.types.writable.Writables; +import org.apache.hadoop.conf.Configuration; import org.junit.Test; -import com.google.common.collect.Lists; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; public class JobNameBuilderTest { + private static final Configuration CONF = new Configuration(); + @Test public void testBuild() { final String pipelineName = "PipelineName"; final String nodeName = "outputNode"; - DoNode doNode = DoNode.createOutputNode(nodeName, Writables.strings().getConverter(), Writables.strings()); - JobNameBuilder jobNameBuilder = new JobNameBuilder(pipelineName); + DoNode doNode = createDoNode(nodeName); + JobNameBuilder jobNameBuilder = new JobNameBuilder(CONF, pipelineName, 1, 1); + jobNameBuilder.visit(Lists.newArrayList(doNode)); + String jobName = jobNameBuilder.build(); + + assertEquals(String.format("%s: %s (1/1)", pipelineName, nodeName), jobName); + } + + @Test + public void testNodeNameTooLong() { + final String pipelineName = "PipelineName"; + final String nodeName = Strings.repeat("very_long_node_name", 100); + DoNode doNode = createDoNode(nodeName); + JobNameBuilder jobNameBuilder = new JobNameBuilder(CONF, pipelineName, 1, 1); jobNameBuilder.visit(Lists.newArrayList(doNode)); String jobName = jobNameBuilder.build(); - assertEquals(String.format("%s: %s", pipelineName, nodeName), jobName); + assertFalse(jobName.contains(nodeName)); // Tests that the very long node name was shorten } + private DoNode createDoNode(String nodeName) { + return DoNode.createOutputNode(nodeName, Writables.strings().getConverter(), Writables.strings()); + } }
