Repository: crunch Updated Branches: refs/heads/master 2bd612eb9 -> a727606ea
CRUNCH-537 Deprecate PIPELINE_PLAN_DOTFILE Deprecate the method of retrieving the pipeline plan dotfile via the PlanningParameters.PIPELINE_PLAN_DOTFILE configuration key, and instead have the dotfile written to a directory (as is already supported). This commit also renames ths misspelled DotfileUtills to DotfileUtil, leaving a deprecated DotfileUtills in place to handle backwards compatibility. Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/8d9c5403 Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/8d9c5403 Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/8d9c5403 Branch: refs/heads/master Commit: 8d9c54037243b10acccacf21de791ff725180e31 Parents: 2bd612e Author: Gabriel Reid <[email protected]> Authored: Thu Jul 2 22:04:00 2015 +0200 Committer: Gabriel Reid <[email protected]> Committed: Thu Jul 9 20:12:38 2015 +0200 ---------------------------------------------------------------------- .../apache/crunch/impl/mr/plan/DotfilesIT.java | 8 +- .../apache/crunch/impl/mr/plan/DotfileUtil.java | 212 +++++++++++++++++++ .../crunch/impl/mr/plan/DotfileUtills.java | 142 +------------ .../apache/crunch/impl/mr/plan/MSCRPlanner.java | 14 +- .../crunch/impl/mr/plan/PlanningParameters.java | 5 + 5 files changed, 236 insertions(+), 145 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/8d9c5403/crunch-core/src/it/java/org/apache/crunch/impl/mr/plan/DotfilesIT.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/it/java/org/apache/crunch/impl/mr/plan/DotfilesIT.java b/crunch-core/src/it/java/org/apache/crunch/impl/mr/plan/DotfilesIT.java index bfb1258..98ae8d1 100644 --- a/crunch-core/src/it/java/org/apache/crunch/impl/mr/plan/DotfilesIT.java +++ b/crunch-core/src/it/java/org/apache/crunch/impl/mr/plan/DotfilesIT.java @@ -76,7 +76,7 @@ public class DotfilesIT { Configuration conf = tmpDir.getDefaultConfiguration(); - DotfileUtills.setPipelineDotfileOutputDir(conf, dotfileDir.getRootFileName()); + DotfileUtil.setPipelineDotfileOutputDir(conf, dotfileDir.getRootFileName()); run(new MRPipeline(DotfilesIT.class, conf), WritableTypeFamily.getInstance()); @@ -109,8 +109,8 @@ public class DotfilesIT { Configuration conf = tmpDir.getDefaultConfiguration(); - DotfileUtills.setPipelineDotfileOutputDir(conf, dotfileDir.getRootFileName()); - DotfileUtills.enableDebugDotfiles(conf); + DotfileUtil.setPipelineDotfileOutputDir(conf, dotfileDir.getRootFileName()); + DotfileUtil.enableDebugDotfiles(conf); run(new MRPipeline(DotfilesIT.class, conf), WritableTypeFamily.getInstance()); @@ -133,7 +133,7 @@ public class DotfilesIT { Configuration conf = tmpDir.getDefaultConfiguration(); - DotfileUtills.enableDebugDotfiles(conf); + DotfileUtil.enableDebugDotfiles(conf); run(new MRPipeline(DotfilesIT.class, conf), WritableTypeFamily.getInstance()); http://git-wip-us.apache.org/repos/asf/crunch/blob/8d9c5403/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtil.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtil.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtil.java new file mode 100644 index 0000000..6475e87 --- /dev/null +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtil.java @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.crunch.impl.mr.plan; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.crunch.Target; +import org.apache.crunch.impl.dist.collect.PCollectionImpl; +import org.apache.crunch.impl.mr.MRPipeline; +import org.apache.crunch.impl.mr.exec.MRExecutor; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +/** + * Helper class that manages the dotfile generation lifecycle and configuring the dotfile debug context. + */ +public class DotfileUtil { + + private static final Logger LOG = LoggerFactory.getLogger(DotfileUtil.class); + + private final Class<?> jarClass; + private final Configuration conf; + + private String rtNodesDotfile = ""; + private String basePlanGraphDotfile = ""; + private String splitGraphPlanDotfile = ""; + private String pcollectionLineageDotfile = ""; + private String planDotFile = ""; + + DotfileUtil(Class<?> jarClass, Configuration conf) { + this.jarClass = jarClass; + this.conf = conf; + } + + /** + * Builds a lineage dotfile only if the dotfile-debug mode is enabled. + */ + void buildLineageDotfile(Map<PCollectionImpl<?>, Set<Target>> outputs) { + if (isDebugDotfilesEnabled(conf)) { + try { + pcollectionLineageDotfile = new DotfileWriterPCollectionLineage(outputs) + .buildDiagram("PCollection Lineage Plan (" + jarClass.getSimpleName() + ")"); + } catch (Exception ex) { + LOG.error("Problem creating debug dotfile:", ex); + } + } + } + + /** + * Builds the base graph dotfile only if the dotfile-debug mode is enabled. + */ + void buildBaseGraphDotfile(Map<PCollectionImpl<?>, Set<Target>> outputs, Graph graph) { + if (isDebugDotfilesEnabled(conf)) { + try { + basePlanGraphDotfile = new DotfileWriterGraph(graph, outputs, null).buildDiagram("Base Graph (" + + jarClass.getSimpleName() + ")"); + } catch (Exception ex) { + LOG.error("Problem creating debug dotfile:", ex); + } + } + } + + /** + * Builds a split graph dotfile only if the dotfile-debug mode is enabled. + */ + void buildSplitGraphDotfile(Map<PCollectionImpl<?>, Set<Target>> outputs, Graph graph, List<List<Vertex>> components) { + if (isDebugDotfilesEnabled(conf)) { + try { + splitGraphPlanDotfile = new DotfileWriterGraph(graph, outputs, components) + .buildDiagram("Graph With Components (" + jarClass.getSimpleName() + ")"); + } catch (Exception ex) { + LOG.error("Problem creating debug dotfile:", ex); + } + } + } + + /** + * Builds a RT node dotfile only if the dotfile-debug mode is enabled. + */ + void buildRTNodesDotfile(MRExecutor exec) { + if (isDebugDotfilesEnabled(conf)) { + try { + rtNodesDotfile = new DotfileWriterRTNodes(exec.getJobs()).buildDiagram("Run Time Plan (" + + jarClass.getSimpleName() + ")"); + } catch (Exception ex) { + LOG.error("Problem creating debug dotfile:", ex); + } + } + } + + /** + * Build the plan dotfile despite of the the dotfile-debug mode. + * + * @throws IOException + */ + void buildPlanDotfile(MRExecutor exec, Multimap<Target, JobPrototype> assignments, MRPipeline pipeline, int lastJobID) { + try { + DotfileWriter dotfileWriter = new DotfileWriter(); + + for (JobPrototype proto : Sets.newHashSet(assignments.values())) { + dotfileWriter.addJobPrototype(proto); + } + + planDotFile = dotfileWriter.buildDotfile(); + + } catch (Exception ex) { + LOG.error("Problem creating debug dotfile:", ex); + } + } + + /** + * Attach the generated dotfiles to the {@link MRExecutor} context!. Note that the planDotFile is always added! + */ + void addDotfilesToContext(MRExecutor exec) { + try { + // The job plan is always enabled and set in the Configuration; + conf.set(PlanningParameters.PIPELINE_PLAN_DOTFILE, planDotFile); + exec.addNamedDotFile("jobplan", planDotFile); + + // Debug dotfiles are only stored if the configuration is set to enabled + if (isDebugDotfilesEnabled(conf)) { + exec.addNamedDotFile("rt_plan", rtNodesDotfile); + exec.addNamedDotFile("base_graph_plan", basePlanGraphDotfile); + exec.addNamedDotFile("split_graph_plan", splitGraphPlanDotfile); + exec.addNamedDotFile("lineage_plan", pcollectionLineageDotfile); + } + } catch (Exception ex) { + LOG.error("Problem creating debug dotfile:", ex); + } + } + + /** + * Determine if the creation of debugging dotfiles (which explain various stages in the job planning process) + * is enabled. + * <p/> + * In order for this to be <tt>true</tt>, {@link #setPipelineDotfileOutputDir(Configuration, String)} needs to also + * have been called with the same configuration object. + * <p/> + * Note that regardless of whether or not debugging dotfile creation is enabled, the high-level job plan will always + * be dumped if {@link #setPipelineDotfileOutputDir(Configuration, String)} has been called. + * + * @param conf pipeline configuration + * @return <tt>true</tt> if the creation of debugging dotfiles is enabled, otherwise <tt>false</tt> + */ + public static boolean isDebugDotfilesEnabled(Configuration conf) { + return conf.getBoolean(PlanningParameters.DEBUG_DOTFILES_ENABLED, false) + && conf.get(PlanningParameters.PIPELINE_DOTFILE_OUTPUT_DIR) != null; + } + + /** + * Enable the creation of debugging dotfiles (which explain various stages in the job planning process). + * + * @param conf pipeline configuration + */ + public static void enableDebugDotfiles(Configuration conf) { + conf.setBoolean(PlanningParameters.DEBUG_DOTFILES_ENABLED, true); + } + + /** + * Disable the creation of debugging dotfiles. + * + * @param conf pipeline configuration + */ + public static void disableDebugDotfiles(Configuration conf) { + conf.setBoolean(PlanningParameters.DEBUG_DOTFILES_ENABLED, false); + } + + /** + * Set an output directory where job plan dotfiles will be written. + * <p/> + * If a directory has been set, a dotfile containing the job plan will be dumped to the given directory. + * + * @param conf the pipeline configuration + * @param outputPath the path to which dotfiles are to be written, in the form of a URI + */ + public static void setPipelineDotfileOutputDir(Configuration conf, String outputPath) { + conf.set(PlanningParameters.PIPELINE_DOTFILE_OUTPUT_DIR, outputPath); + } + + /** + * Retrieve the path where job plan dotfiles are to be written. + * + * @param conf pipeline configuration + * @return the path where job plan dotfiles are to be written, or <tt>null</tt> if the output path hasn't been set + */ + public static String getPipelineDotfileOutputDir(Configuration conf) { + return conf.get(PlanningParameters.PIPELINE_DOTFILE_OUTPUT_DIR); + } +} http://git-wip-us.apache.org/repos/asf/crunch/blob/8d9c5403/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtills.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtills.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtills.java index 7dea255..ff20578 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtills.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/DotfileUtills.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,158 +17,32 @@ */ package org.apache.crunch.impl.mr.plan; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.crunch.Target; -import org.apache.crunch.impl.dist.collect.PCollectionImpl; -import org.apache.crunch.impl.mr.MRPipeline; -import org.apache.crunch.impl.mr.exec.MRExecutor; import org.apache.hadoop.conf.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; /** * Helper class that manages the dotfile generation lifecycle and configuring the dotfile debug context. + * + * @deprecated use {@link DotfileUtil} instead */ public class DotfileUtills { - private static final Logger LOG = LoggerFactory.getLogger(DotfileUtills.class); - - private final Class<?> jarClass; - private final Configuration conf; - - private String rtNodesDotfile = ""; - private String basePlanGraphDotfile = ""; - private String splitGraphPlanDotfile = ""; - private String pcollectionLineageDotfile = ""; - private String planDotFile = ""; - - DotfileUtills(Class<?> jarClass, Configuration conf) { - this.jarClass = jarClass; - this.conf = conf; - } - - /** - * Builds a lineage dotfile only if the dotfile-debug mode is enabled. - */ - void buildLineageDotfile(Map<PCollectionImpl<?>, Set<Target>> outputs) { - if (isDebugDotfilesEnabled(conf)) { - try { - pcollectionLineageDotfile = new DotfileWriterPCollectionLineage(outputs) - .buildDiagram("PCollection Lineage Plan (" + jarClass.getSimpleName() + ")"); - } catch (Exception ex) { - LOG.error("Problem creating debug dotfile:", ex); - } - } - } - - /** - * Builds the base graph dotfile only if the dotfile-debug mode is enabled. - */ - void buildBaseGraphDotfile(Map<PCollectionImpl<?>, Set<Target>> outputs, Graph graph) { - if (isDebugDotfilesEnabled(conf)) { - try { - basePlanGraphDotfile = new DotfileWriterGraph(graph, outputs, null).buildDiagram("Base Graph (" - + jarClass.getSimpleName() + ")"); - } catch (Exception ex) { - LOG.error("Problem creating debug dotfile:", ex); - } - } - } - - /** - * Builds a split graph dotfile only if the dotfile-debug mode is enabled. - */ - void buildSplitGraphDotfile(Map<PCollectionImpl<?>, Set<Target>> outputs, Graph graph, List<List<Vertex>> components) { - if (isDebugDotfilesEnabled(conf)) { - try { - splitGraphPlanDotfile = new DotfileWriterGraph(graph, outputs, components) - .buildDiagram("Graph With Components (" + jarClass.getSimpleName() + ")"); - } catch (Exception ex) { - LOG.error("Problem creating debug dotfile:", ex); - } - } - } - - /** - * Builds a RT node dotfile only if the dotfile-debug mode is enabled. - */ - void buildRTNodesDotfile(MRExecutor exec) { - if (isDebugDotfilesEnabled(conf)) { - try { - rtNodesDotfile = new DotfileWriterRTNodes(exec.getJobs()).buildDiagram("Run Time Plan (" - + jarClass.getSimpleName() + ")"); - } catch (Exception ex) { - LOG.error("Problem creating debug dotfile:", ex); - } - } - } - - /** - * Build the plan dotfile despite of the the dotfile-debug mode. - * - * @throws IOException - */ - void buildPlanDotfile(MRExecutor exec, Multimap<Target, JobPrototype> assignments, MRPipeline pipeline, int lastJobID) { - try { - DotfileWriter dotfileWriter = new DotfileWriter(); - - for (JobPrototype proto : Sets.newHashSet(assignments.values())) { - dotfileWriter.addJobPrototype(proto); - } - - planDotFile = dotfileWriter.buildDotfile(); - - } catch (Exception ex) { - LOG.error("Problem creating debug dotfile:", ex); - } - } - - /** - * Attach the generated dotfiles to the {@link MRExecutor} context!. Note that the planDotFile is always added! - */ - void addDotfilesToContext(MRExecutor exec) { - try { - // The job plan is always enabled and set in the Configuration; - conf.set(PlanningParameters.PIPELINE_PLAN_DOTFILE, planDotFile); - exec.addNamedDotFile("jobplan", planDotFile); - - // Debug dotfiles are only stored if the configuration is set to enabled - if (isDebugDotfilesEnabled(conf)) { - exec.addNamedDotFile("rt_plan", rtNodesDotfile); - exec.addNamedDotFile("base_graph_plan", basePlanGraphDotfile); - exec.addNamedDotFile("split_graph_plan", splitGraphPlanDotfile); - exec.addNamedDotFile("lineage_plan", pcollectionLineageDotfile); - } - } catch (Exception ex) { - LOG.error("Problem creating debug dotfile:", ex); - } - } - public static boolean isDebugDotfilesEnabled(Configuration conf) { - return conf.getBoolean(PlanningParameters.DEBUG_DOTFILES_ENABLED, false) - && conf.get(PlanningParameters.PIPELINE_DOTFILE_OUTPUT_DIR) != null; + return DotfileUtil.isDebugDotfilesEnabled(conf); } public static void enableDebugDotfiles(Configuration conf) { - conf.setBoolean(PlanningParameters.DEBUG_DOTFILES_ENABLED, true); + DotfileUtil.enableDebugDotfiles(conf); } public static void disableDebugDotfilesEnabled(Configuration conf) { - conf.setBoolean(PlanningParameters.DEBUG_DOTFILES_ENABLED, false); + DotfileUtil.disableDebugDotfiles(conf); } public static void setPipelineDotfileOutputDir(Configuration conf, String outputDir) { - conf.set(PlanningParameters.PIPELINE_DOTFILE_OUTPUT_DIR, outputDir); + DotfileUtil.setPipelineDotfileOutputDir(conf, outputDir); } public static String getPipelineDotfileOutputDir(Configuration conf) { - return conf.get(PlanningParameters.PIPELINE_DOTFILE_OUTPUT_DIR); + return DotfileUtil.getPipelineDotfileOutputDir(conf); } } http://git-wip-us.apache.org/repos/asf/crunch/blob/8d9c5403/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java index b470586..eace851 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/MSCRPlanner.java @@ -87,10 +87,10 @@ public class MSCRPlanner { public MRExecutor plan(Class<?> jarClass, Configuration conf) throws IOException { - DotfileUtills dotfileUtills = new DotfileUtills(jarClass, conf); + DotfileUtil dotfileUtil = new DotfileUtil(jarClass, conf); // Generate the debug lineage dotfiles (if configuration is enabled) - dotfileUtills.buildLineageDotfile(outputs); + dotfileUtil.buildLineageDotfile(outputs); Map<PCollectionImpl<?>, Set<Target>> targetDeps = Maps.newTreeMap(DEPTH_COMPARATOR); for (PCollectionImpl<?> pcollect : outputs.keySet()) { @@ -137,8 +137,8 @@ public class MSCRPlanner { List<List<Vertex>> components = graph.connectedComponents(); // Generate the debug graph dotfiles (if configuration is enabled) - dotfileUtills.buildBaseGraphDotfile(outputs, graph); - dotfileUtills.buildSplitGraphDotfile(outputs, graph, components); + dotfileUtil.buildBaseGraphDotfile(outputs, graph); + dotfileUtil.buildSplitGraphDotfile(outputs, graph, components); // For each component, we will create one or more job prototypes, // depending on its profile. @@ -207,17 +207,17 @@ public class MSCRPlanner { MRExecutor exec = new MRExecutor(conf, jarClass, outputs, toMaterialize, appendedTargets, pipelineCallables); // Generate the debug Plan dotfiles - dotfileUtills.buildPlanDotfile(exec, assignments, pipeline, lastJobID); + dotfileUtil.buildPlanDotfile(exec, assignments, pipeline, lastJobID); for (JobPrototype proto : Sets.newHashSet(assignments.values())) { exec.addJob(proto.getCrunchJob(jarClass, conf, pipeline, lastJobID)); } // Generate the debug RTNode dotfiles (if configuration is enabled) - dotfileUtills.buildRTNodesDotfile(exec); + dotfileUtil.buildRTNodesDotfile(exec); // Attach the dotfiles to the MRExcutor context - dotfileUtills.addDotfilesToContext(exec); + dotfileUtil.addDotfilesToContext(exec); return exec; } http://git-wip-us.apache.org/repos/asf/crunch/blob/8d9c5403/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java index a40abf9..2cf3a6f 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/mr/plan/PlanningParameters.java @@ -17,6 +17,8 @@ */ package org.apache.crunch.impl.mr.plan; +import org.apache.hadoop.conf.Configuration; + /** * Collection of Configuration keys and various constants used when planning MapReduce jobs for a * pipeline. @@ -30,7 +32,10 @@ public final class PlanningParameters { /** * Configuration key under which a <a href="http://www.graphviz.org">DOT</a> file containing the * pipeline job graph is stored by the planner. + * + * @deprecated use {@link DotfileUtil#setPipelineDotfileOutputDir(Configuration, String)} instead */ + @Deprecated public static final String PIPELINE_PLAN_DOTFILE = "crunch.planner.dotfile"; public static final String DEBUG_DOTFILES_ENABLED = "crunch.internals.dotfiles";
