[SYSTEMML-2202] Remove global data flow optimizer prototype (level 4) This patch removes the early prototype of the global data flow optimizer, which was written for map-reduce and never reached production-ready status. While the general idea is still viable, we would need to redesign it from scratch to make it practical and outperform basic heuristic rewrites.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b3419b09 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b3419b09 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b3419b09 Branch: refs/heads/master Commit: b3419b09fbd5d8a16c2c2f66bc4a1838539be051 Parents: 140f1e1 Author: Matthias Boehm <mboe...@gmail.com> Authored: Fri Mar 23 17:52:54 2018 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Fri Mar 23 22:18:24 2018 -0700 ---------------------------------------------------------------------- pom.xml | 1 - .../java/org/apache/sysml/api/DMLScript.java | 10 - .../sysml/api/mlcontext/ScriptExecutor.java | 20 - .../sysml/hops/globalopt/GDFEnumOptimizer.java | 568 ------------------- .../sysml/hops/globalopt/GlobalOptimizer.java | 47 -- .../hops/globalopt/GlobalOptimizerWrapper.java | 118 ---- .../hops/globalopt/InterestingProperties.java | 115 ---- .../sysml/hops/globalopt/MemoStructure.java | 96 ---- .../org/apache/sysml/hops/globalopt/Plan.java | 225 -------- .../apache/sysml/hops/globalopt/PlanSet.java | 149 ----- .../sysml/hops/globalopt/RewriteConfig.java | 94 --- .../apache/sysml/hops/globalopt/Summary.java | 101 ---- .../globalopt/gdfgraph/GDFCrossBlockNode.java | 95 ---- .../sysml/hops/globalopt/gdfgraph/GDFGraph.java | 49 -- .../hops/globalopt/gdfgraph/GDFLoopNode.java | 66 --- .../sysml/hops/globalopt/gdfgraph/GDFNode.java | 165 ------ .../hops/globalopt/gdfgraph/GraphBuilder.java | 300 ---------- .../gdfresolve/GDFMismatchHeuristic.java | 48 -- .../GDFMismatchHeuristicBlocksizeOrFirst.java | 49 -- .../gdfresolve/GDFMismatchHeuristicFirst.java | 38 -- .../gdfresolve/MismatchHeuristicFactory.java | 41 -- .../java/org/apache/sysml/utils/Explain.java | 116 ---- .../functions/gdfo/GDFOLinregCG.java | 171 ------ .../functions/gdfo/GDFOLinregDS.java | 166 ------ .../functions/gdfo/GDFOLinregDSsimpl.java | 166 ------ .../functions/gdfo/GDFOMMChainLoop.java | 132 ----- .../gdfo/HashInterestingPropertiesTest.java | 96 ---- .../integration/mlcontext/MLContextTest.java | 10 +- src/test/scripts/functions/gdfo/LinregCG.R | 57 -- src/test/scripts/functions/gdfo/LinregCG.dml | 56 -- src/test/scripts/functions/gdfo/LinregDS.R | 43 -- src/test/scripts/functions/gdfo/LinregDS.dml | 39 -- src/test/scripts/functions/gdfo/LinregDSsimpl.R | 35 -- .../scripts/functions/gdfo/LinregDSsimpl.dml | 31 - src/test/scripts/functions/gdfo/MMChainLoop.R | 37 -- src/test/scripts/functions/gdfo/MMChainLoop.dml | 37 -- .../gdfo/SystemML-config-globalopt.xml | 26 - .../functions/gdfo/ZPackageSuite.java | 40 -- 38 files changed, 2 insertions(+), 3651 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index b79f119..5dfaf11 100644 --- a/pom.xml +++ b/pom.xml @@ -442,7 +442,6 @@ <include>**/integration/applications/**/*Suite.java</include> <include>**/integration/conversion/*Suite.java</include> <include>**/integration/functions/data/*Suite.java</include> - <include>**/integration/functions/gdfo/*Suite.java</include> <include>**/integration/functions/sparse/*Suite.java</include> <include>**/integration/functions/codegenalg/*Suite.java</include> <include>**/integration/functions/**/*Test*.java</include> http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/api/DMLScript.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java index 7a44838..7e5e1f3 100644 --- a/src/main/java/org/apache/sysml/api/DMLScript.java +++ b/src/main/java/org/apache/sysml/api/DMLScript.java @@ -64,8 +64,6 @@ import org.apache.sysml.debug.DMLDebuggerException; import org.apache.sysml.debug.DMLDebuggerProgramInfo; import org.apache.sysml.hops.HopsException; import org.apache.sysml.hops.OptimizerUtils; -import org.apache.sysml.hops.OptimizerUtils.OptimizationLevel; -import org.apache.sysml.hops.globalopt.GlobalOptimizerWrapper; import org.apache.sysml.lops.Lop; import org.apache.sysml.lops.LopsException; import org.apache.sysml.parser.DMLProgram; @@ -731,14 +729,6 @@ public class DMLScript //Step 7: generate runtime program, incl codegen Program rtprog = dmlt.getRuntimeProgram(prog, dmlconf); - //Step 8: [optional global data flow optimization] - if(OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY) ) - { - LOG.warn("Optimization level '" + OptimizationLevel.O4_GLOBAL_TIME_MEMORY + "' " + - "is still in experimental state and not intended for production use."); - rtprog = GlobalOptimizerWrapper.optimizeProgram(prog, rtprog); - } - //launch SystemML appmaster (if requested and not already in launched AM) if( dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER) ){ if( !isActiveAM() && DMLYarnClientProxy.launchDMLYarnAppmaster(dmlScriptStr, dmlconf, allArgs, rtprog) ) http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java index 9b0dfc8..1fa9e4e 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java @@ -37,8 +37,6 @@ import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.conf.DMLConfig; import org.apache.sysml.hops.HopsException; import org.apache.sysml.hops.OptimizerUtils; -import org.apache.sysml.hops.OptimizerUtils.OptimizationLevel; -import org.apache.sysml.hops.globalopt.GlobalOptimizerWrapper; import org.apache.sysml.hops.rewrite.ProgramRewriter; import org.apache.sysml.hops.rewrite.RewriteRemovePersistentReadWrite; import org.apache.sysml.lops.LopsException; @@ -337,7 +335,6 @@ public class ScriptExecutor { constructLops(); generateRuntimeProgram(); showExplanation(); - globalDataFlowOptimization(); countCompiledMRJobsAndSparkInstructions(); initializeCachingAndScratchSpace(); cleanupRuntimeProgram(); @@ -477,23 +474,6 @@ public class ScriptExecutor { } /** - * Optimize the program. - */ - protected void globalDataFlowOptimization() { - if (OptimizerUtils.isOptLevel(OptimizationLevel.O4_GLOBAL_TIME_MEMORY)) { - try { - runtimeProgram = GlobalOptimizerWrapper.optimizeProgram(dmlProgram, runtimeProgram); - } catch (DMLRuntimeException e) { - throw new MLContextException("Exception occurred during global data flow optimization", e); - } catch (HopsException e) { - throw new MLContextException("Exception occurred during global data flow optimization", e); - } catch (LopsException e) { - throw new MLContextException("Exception occurred during global data flow optimization", e); - } - } - } - - /** * Parse the script into an ANTLR parse tree, and convert this parse tree * into a SystemML program. Parsing includes lexical/syntactic analysis. */ http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/GDFEnumOptimizer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/GDFEnumOptimizer.java b/src/main/java/org/apache/sysml/hops/globalopt/GDFEnumOptimizer.java deleted file mode 100644 index 50284b1..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/GDFEnumOptimizer.java +++ /dev/null @@ -1,568 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import java.util.ArrayList; -import java.util.HashMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.sysml.conf.ConfigurationManager; -import org.apache.sysml.hops.DataOp; -import org.apache.sysml.hops.Hop; -import org.apache.sysml.hops.Hop.DataOpTypes; -import org.apache.sysml.hops.HopsException; -import org.apache.sysml.hops.Hop.FileFormatTypes; -import org.apache.sysml.hops.OptimizerUtils; -import org.apache.sysml.hops.cost.CostEstimationWrapper; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFGraph; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFLoopNode; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFNode; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFNode.NodeType; -import org.apache.sysml.hops.globalopt.gdfresolve.GDFMismatchHeuristic; -import org.apache.sysml.hops.globalopt.gdfresolve.GDFMismatchHeuristic.MismatchHeuristicType; -import org.apache.sysml.hops.globalopt.gdfresolve.MismatchHeuristicFactory; -import org.apache.sysml.hops.rewrite.HopRewriteUtils; -import org.apache.sysml.hops.recompile.Recompiler; -import org.apache.sysml.hops.recompile.Recompiler.ResetType; -import org.apache.sysml.lops.LopsException; -import org.apache.sysml.lops.LopProperties.ExecType; -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.controlprogram.LocalVariableMap; -import org.apache.sysml.runtime.controlprogram.Program; -import org.apache.sysml.runtime.controlprogram.ProgramBlock; -import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; -import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory; -import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing; - -/** - * Global data flow optimization via enumeration-based optimizer (dynamic programming). - * - * - * ADDITIONAL PERFORMANCE OPT (once everything is completely working) - * TODO cache for interesting properties - * TODO partial runtime plan generation - * TODO partial runtime plan costing - * - */ -public class GDFEnumOptimizer extends GlobalOptimizer -{ - - private static final Log LOG = LogFactory.getLog(GDFEnumOptimizer.class); - - //internal configuration parameters - //note: that branch and bound pruning is invalid if we cost entire programs - private static final boolean BRANCH_AND_BOUND_PRUNING = true; - private static final boolean PREFERRED_PLAN_SELECTION = true; - private static final boolean COST_FULL_PROGRAMS = false; - private static final boolean ENUM_CP_BLOCKSIZES = false; - private static final MismatchHeuristicType DEFAULT_MISMATCH_HEURISTIC = MismatchHeuristicType.FIRST; - - //internal configuration parameters - private static final int[] BLOCK_SIZES = new int[]{ 1024,//1 * DMLTranslator.DMLBlockSize, - 2048,//2 * DMLTranslator.DMLBlockSize, - 4096};//4 * DMLTranslator.DMLBlockSize}; - private static final double BRANCH_AND_BOUND_REL_THRES = Math.pow(10, -5); - //private static final int[] REPLICATION_FACTORS = new int[]{1,3,5}; - - private MemoStructure _memo = null; //plan memoization table - private static GDFMismatchHeuristic _resolve = null; - private static long _enumeratedPlans = 0; - private static long _prunedInvalidPlans = 0; - private static long _prunedSuboptimalPlans = 0; - private static long _compiledPlans = 0; - private static long _costedPlans = 0; - private static long _planMismatches = 0; - - - public GDFEnumOptimizer( ) - throws DMLRuntimeException - { - //init internal memo structure - _memo = new MemoStructure(); - - //init mismatch heuristic - _resolve = MismatchHeuristicFactory.createMismatchHeuristic( - DEFAULT_MISMATCH_HEURISTIC); - } - - @Override - public GDFGraph optimize(GDFGraph gdfgraph, Summary summary) - throws DMLRuntimeException, HopsException, LopsException - { - Timing time = new Timing(true); - - Program prog = gdfgraph.getRuntimeProgram(); - ExecutionContext ec = ExecutionContextFactory.createContext(prog); - ArrayList<GDFNode> roots = gdfgraph.getGraphRootNodes(); - - //Step 1: baseline costing for branch and bound costs - double initCosts = Double.MAX_VALUE; - if( BRANCH_AND_BOUND_PRUNING ) { - initCosts = CostEstimationWrapper.getTimeEstimate(prog, ec); - initCosts = initCosts * (1+BRANCH_AND_BOUND_REL_THRES); - } - - //Step 2: dynamic programming plan generation - //(finally, pick optimal root plans over all interesting property sets) - ArrayList<Plan> rootPlans = new ArrayList<>(); - for( GDFNode node : roots ) { - PlanSet ps = enumOpt(node, _memo, initCosts); - Plan optPlan = ps.getPlanWithMinCosts(); - rootPlans.add( optPlan ); - } - long enumPlanMismatch = getPlanMismatches(); - - //check for final containment of independent roots and pick optimal - HashMap<Long, Plan> memo = new HashMap<>(); - resetPlanMismatches(); - for( Plan p : rootPlans ) - rSetRuntimePlanConfig(p, memo); - long finalPlanMismatch = getPlanMismatches(); - - //generate final runtime plan (w/ optimal config) - Recompiler.recompileProgramBlockHierarchy(prog.getProgramBlocks(), - new LocalVariableMap(), 0, ResetType.NO_RESET); - - ec = ExecutionContextFactory.createContext(prog); - double optCosts = CostEstimationWrapper.getTimeEstimate(prog, ec); - - //maintain optimization summary statistics - summary.setCostsInitial( initCosts ); - summary.setCostsOptimal( optCosts ); - summary.setNumEnumPlans( _enumeratedPlans ); - summary.setNumPrunedInvalidPlans( _prunedInvalidPlans ); - summary.setNumPrunedSuboptPlans( _prunedSuboptimalPlans ); - summary.setNumCompiledPlans( _compiledPlans ); - summary.setNumCostedPlans( _costedPlans ); - summary.setNumEnumPlanMismatch( enumPlanMismatch ); - summary.setNumFinalPlanMismatch( finalPlanMismatch ); - summary.setTimeOptim( time.stop() ); - - return gdfgraph; - } - - /** - * Core dynamic programming enumeration algorithm - * for global data flow optimization. - * - * @param node the GDF node - * @param memo the memo structure - * @param maxCosts max costs - * @return the plan set - * @throws DMLRuntimeException if DMLRuntimeException occurs - */ - public static PlanSet enumOpt( GDFNode node, MemoStructure memo, double maxCosts ) - throws DMLRuntimeException - { - //memoization of already enumerated subgraphs - if( memo.constainsEntry(node) ) - return memo.getEntry(node); - - //enumerate node plans - PlanSet P = enumNodePlans( node, memo, maxCosts ); - //System.out.println("Plans after enumNodePlan:\n"+P.toString()); - - //combine local node plan with optimal child plans - for( GDFNode c : node.getInputs() ) - { - //recursive optimization - PlanSet Pc = enumOpt( c, memo, maxCosts ); - if( c instanceof GDFLoopNode ) - Pc = Pc.selectChild( node ); - - //combine parent-child plans - P = P.crossProductChild(Pc); - _enumeratedPlans += P.size(); - - //prune invalid plans - pruneInvalidPlans( P ); - } - - //prune suboptimal plans - pruneSuboptimalPlans( P, maxCosts ); - - //memoization of created entries - memo.putEntry(node, P); - - return P; - } - - private static PlanSet enumNodePlans( GDFNode node, MemoStructure memo, double maxCosts ) - throws DMLRuntimeException - { - ArrayList<Plan> plans = new ArrayList<>(); - ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; - - //ENUMERATE HOP PLANS - // CASE 1: core hop enumeration (other than persistent/transient read/write) - if( node.getNodeType() == NodeType.HOP_NODE && !(node.getHop() instanceof DataOp ) ) - { - //core rewrite enumeration for cp and mr - enumHopNodePlans(node, plans); - } - //CASE 2: dataop hop enumeration - else if( node.getHop() instanceof DataOp ) - { - DataOp dhop = (DataOp)node.getHop(); - - if( dhop.getDataOpType()==DataOpTypes.PERSISTENTREAD ) - { - //for persistent read the interesting properties are fixed by the input - //but we can decide on output properties - ExecType et = (dhop.getMemEstimate()>OptimizerUtils.getLocalMemBudget() - || HopRewriteUtils.alwaysRequiresReblock(dhop)) ? - CLUSTER : ExecType.CP; - - int[] blocksizes = (et == CLUSTER) ? BLOCK_SIZES : new int[]{BLOCK_SIZES[0]}; - for( Integer bs : blocksizes ) - { - RewriteConfig rcmr = new RewriteConfig(et, bs, FileFormatTypes.BINARY); - InterestingProperties ipsmr = rcmr.deriveInterestingProperties(); - Plan mrplan = new Plan(node, ipsmr, rcmr, null); - plans.add( mrplan ); - } - } - else if( dhop.getDataOpType()==DataOpTypes.PERSISTENTWRITE ) - { - //for persistent write the interesting properties are fixed by the given - //write specification - ExecType et = (dhop.getMemEstimate()>OptimizerUtils.getLocalMemBudget()) ? - CLUSTER : ExecType.CP; - - RewriteConfig rcmr = new RewriteConfig(et, (int)dhop.getRowsInBlock(), dhop.getInputFormatType()); - InterestingProperties ipsmr = rcmr.deriveInterestingProperties(); - Plan mrplan = new Plan(node, ipsmr, rcmr, null); - plans.add( mrplan ); - } - else if( dhop.getDataOpType()==DataOpTypes.TRANSIENTREAD - || dhop.getDataOpType()==DataOpTypes.TRANSIENTWRITE) - { - //note: full enumeration for transient read and write; otherwise the properties - //of these hops are never set because pass-through plans refer to different hops - enumHopNodePlans(node, plans); - } - } - //ENUMERATE LOOP PLANS - else if( node.getNodeType() == NodeType.LOOP_NODE ) - { - //TODO consistency checks inputs and outputs (updated vars) - - GDFLoopNode lnode = (GDFLoopNode) node; - - //step 0: recursive call optimize on inputs - //no additional pruning (validity, optimality) required - for( GDFNode in : lnode.getLoopInputs().values() ) - enumOpt(in, memo, maxCosts); - - //step 1: enumerate loop plan, incl partitioning/checkpoints/reblock for inputs - RewriteConfig rc = new RewriteConfig(ExecType.CP, -1, null); - InterestingProperties ips = rc.deriveInterestingProperties(); - Plan lplan = new Plan(node, ips, rc, null); - plans.add( lplan ); - - //step 2: recursive call optimize on predicate - //(predicate might be null if single variable) - if( lnode.getLoopPredicate() != null ) - enumOpt(lnode.getLoopPredicate(), memo, maxCosts); - - //step 3: recursive call optimize on outputs - //(return union of all output plans, later selected by output var) - PlanSet Pout = new PlanSet(); - for( GDFNode out : lnode.getLoopOutputs().values() ) - Pout = Pout.union( enumOpt(out, memo, maxCosts) ); - plans.addAll(Pout.getPlans()); - - //note: global pruning later done when returning to enumOpt - //for the entire loop node - } - //CREATE DUMMY CROSSBLOCK PLAN - else if( node.getNodeType() == NodeType.CROSS_BLOCK_NODE ) - { - //do nothing (leads to pass-through on crossProductChild) - } - - return new PlanSet(plans); - } - - private static void enumHopNodePlans(GDFNode node, ArrayList<Plan> plans) - { - ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; - - //create cp plan, if allowed (note: most interesting properties are irrelevant for CP) - if( node.getHop().getMemEstimate() < OptimizerUtils.getLocalMemBudget() ) { - int[] bstmp = ENUM_CP_BLOCKSIZES ? BLOCK_SIZES : new int[]{BLOCK_SIZES[0]}; - for( Integer bs : bstmp ) { - RewriteConfig rccp = new RewriteConfig(ExecType.CP, bs, FileFormatTypes.BINARY); - InterestingProperties ipscp = rccp.deriveInterestingProperties(); - Plan cpplan = new Plan(node, ipscp, rccp, null); - plans.add( cpplan ); - } - } - - //create mr plans, if required - if( node.requiresMREnumeration() ) { - for( Integer bs : BLOCK_SIZES ) - { - RewriteConfig rcmr = new RewriteConfig(CLUSTER, bs, FileFormatTypes.BINARY); - InterestingProperties ipsmr = rcmr.deriveInterestingProperties(); - Plan mrplan = new Plan(node, ipsmr, rcmr, null); - plans.add( mrplan ); - } - } - } - - private static void pruneInvalidPlans( PlanSet plans ) - { - ArrayList<Plan> valid = new ArrayList<>(); - - //check each plan in planset for validity - for( Plan plan : plans.getPlans() ) - { - //a) check matching blocksizes if operation in MR - if( !plan.checkValidBlocksizesInMR() ) { - //System.out.println("pruned invalid blocksize mr op"); - continue; - } - - //b) check matching blocksizes of tread/twrite pairs - if( !plan.checkValidBlocksizesTRead() ) { - //System.out.println("pruned invalid blocksize tread"); - continue; - } - - //c) check valid format in MR - if( !plan.checkValidFormatInMR() ) { - //System.out.println("pruned invalid format: "+plan.getNode().getHop().getClass()); - continue; - } - - //d) check valid execution type per hop (e.g., function, reblock) - if( !plan.checkValidExecutionType() ) { - //System.out.println("pruned invalid execution type: "+plan.getNode().getHop().getClass()); - continue; - } - - valid.add( plan ); - } - - //debug output - int sizeBefore = plans.size(); - int sizeAfter = valid.size(); - _prunedInvalidPlans += (sizeBefore-sizeAfter); - LOG.debug("Pruned invalid plans: "+sizeBefore+" --> "+sizeAfter); - - plans.setPlans( valid ); - } - - private static void pruneSuboptimalPlans( PlanSet plans, double maxCosts ) - throws DMLRuntimeException - { - //costing of all plans incl containment check - for( Plan p : plans.getPlans() ) { - p.setCosts( costRuntimePlan(p) ); - } - - //build and probe for optimal plans (hash-groupby on IPC, min costs) - HashMap<InterestingProperties, Plan> probeMap = new HashMap<>(); - for( Plan p : plans.getPlans() ) - { - //max cost pruning filter (branch-and-bound) - if( BRANCH_AND_BOUND_PRUNING && p.getCosts() > maxCosts ) { - continue; - } - - //plan cost per IPS pruning filter (allow smaller or equal costs) - Plan best = probeMap.get(p.getInterestingProperties()); - if( best!=null && p.getCosts() > best.getCosts() ) { - continue; - } - - //non-preferred plan pruning filter (allow smaller cost or equal cost and preferred plan) - if( PREFERRED_PLAN_SELECTION && best!=null && - p.getCosts() == best.getCosts() && !p.isPreferredPlan() ) { - continue; - } - - //add plan as best per IPS - probeMap.put(p.getInterestingProperties(), p); - - } - - //copy over plans per IPC into one plan set - ArrayList<Plan> optimal = new ArrayList<>(probeMap.values()); - - int sizeBefore = plans.size(); - int sizeAfter = optimal.size(); - _prunedSuboptimalPlans += (sizeBefore-sizeAfter); - LOG.debug("Pruned suboptimal plans: "+sizeBefore+" --> "+sizeAfter); - - plans.setPlans(optimal); - } - - private static double costRuntimePlan(Plan p) - throws DMLRuntimeException - { - Program prog = p.getNode().getProgram(); - if( prog == null ) - throw new DMLRuntimeException("Program not available for runtime plan costing."); - - //put data flow configuration into program - rSetRuntimePlanConfig(p, new HashMap<Long,Plan>()); - - double costs = -1; - if( COST_FULL_PROGRAMS || - (p.getNode().getHop()==null || p.getNode().getProgramBlock()==null) ) - { - //recompile entire runtime program - Recompiler.recompileProgramBlockHierarchy(prog.getProgramBlocks(), - new LocalVariableMap(), 0, ResetType.NO_RESET); - _compiledPlans++; - - //cost entire runtime program - ExecutionContext ec = ExecutionContextFactory.createContext(prog); - costs = CostEstimationWrapper.getTimeEstimate(prog, ec); - } - else - { - Hop currentHop = p.getNode().getHop(); - ProgramBlock pb = p.getNode().getProgramBlock(); - - //keep the old dag roots - ArrayList<Hop> oldRoots = pb.getStatementBlock().getHops(); - Hop tmpHop = null; - if( !(currentHop instanceof DataOp && ((DataOp)currentHop).isWrite()) ){ - ArrayList<Hop> newRoots = new ArrayList<>(); - tmpHop = new DataOp("_tmp", currentHop.getDataType(), currentHop.getValueType(), currentHop, DataOpTypes.TRANSIENTWRITE, "tmp"); - tmpHop.setVisited(); //ensure recursive visitstatus reset on recompile - newRoots.add(tmpHop); - pb.getStatementBlock().setHops(newRoots); - } - - //recompile modified runtime program - Recompiler.recompileProgramBlockHierarchy(prog.getProgramBlocks(), - new LocalVariableMap(), 0, ResetType.NO_RESET); - _compiledPlans++; - - //cost partial runtime program up to current hop - ExecutionContext ec = ExecutionContextFactory.createContext(prog); - costs = CostEstimationWrapper.getTimeEstimate(prog, ec); - - //restore original hop dag - if( tmpHop !=null ) - HopRewriteUtils.removeChildReference(tmpHop, currentHop); - pb.getStatementBlock().setHops(oldRoots); - } - - //release forced data flow configuration from program - rResetRuntimePlanConfig(p, new HashMap<Long,Plan>()); - _costedPlans++; - - return costs; - } - - private static void rSetRuntimePlanConfig( Plan p, HashMap<Long, Plan> memo ) - { - ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; - - //basic memoization including containment check - if( memo.containsKey(p.getNode().getID()) ) { - Plan pmemo = memo.get(p.getNode().getID()); - if( !p.getInterestingProperties().equals( pmemo.getInterestingProperties()) ) - { - //replace plan in memo with new plan - //TODO this would require additional cleanup in special cases - if( _resolve.resolveMismatch(pmemo.getRewriteConfig(), p.getRewriteConfig()) ) - memo.put(p.getNode().getID(), p); - //logging of encounter plan mismatch - LOG.warn("Configuration mismatch on shared node ("+p.getNode().getHop().getHopID()+"). Falling back to heuristic '"+_resolve.getName()+"'."); - LOG.warn(p.getInterestingProperties().toString()); - LOG.warn(memo.get(p.getNode().getID()).getInterestingProperties()); - _planMismatches++; - return; - } - } - - //set plan configuration - Hop hop = p.getNode().getHop(); - if( hop!=null ) - { - RewriteConfig rc = p.getRewriteConfig(); - //set exec type - hop.setForcedExecType(rc.getExecType()); - //set blocksizes and reblock - hop.setRowsInBlock(rc.getBlockSize()); - hop.setColsInBlock(rc.getBlockSize()); - if( rc.getExecType()==CLUSTER ) //after blocksize update - { - //TODO double check dataop condition - side effect from plan validity - boolean reblock = HopRewriteUtils.alwaysRequiresReblock(hop) || - (hop.hasMatrixInputWithDifferentBlocksizes() && !(hop instanceof DataOp)); - hop.setRequiresReblock(reblock); - } - else - hop.setRequiresReblock(false); - } - - //process childs - if( p.getChilds() != null ) - for( Plan c : p.getChilds() ) - rSetRuntimePlanConfig(c, memo); - - //memoization (mark as processed) - memo.put(p.getNode().getID(), p); - } - - private static void rResetRuntimePlanConfig( Plan p, HashMap<Long, Plan> memo ) - { - //basic memoization including containment check - if( memo.containsKey(p.getNode().getID()) ) { - return; - } - - //release forced plan configuration - Hop hop = p.getNode().getHop(); - if( hop!=null ) - { - hop.setForcedExecType(null); - hop.setRowsInBlock(ConfigurationManager.getBlocksize()); - hop.setColsInBlock(ConfigurationManager.getBlocksize()); - if( !HopRewriteUtils.alwaysRequiresReblock(hop) ) { - hop.setRequiresReblock(false); - } - } - - //process childs - if( p.getChilds() != null ) - for( Plan c : p.getChilds() ) - rResetRuntimePlanConfig(c, memo); - - //memoization (mark as processed) - memo.put(p.getNode().getID(), p); - } - - private static long getPlanMismatches(){ - return _planMismatches; - } - - private static void resetPlanMismatches(){ - _planMismatches = 0; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizer.java b/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizer.java deleted file mode 100644 index f2c6748..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizer.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import org.apache.sysml.hops.HopsException; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFGraph; -import org.apache.sysml.lops.LopsException; -import org.apache.sysml.runtime.DMLRuntimeException; - -/** - * Super class for all optimizers (e.g., transformation-based, and enumeration-based) - * - */ -public abstract class GlobalOptimizer -{ - - /** - * Core optimizer call, to be implemented by an instance of a global - * data flow optimizer. - * - * @param gdfgraph the GDF graph - * @param summary the summaruy - * @return the GDF graph - * @throws DMLRuntimeException if DMLRuntimeException occurs - * @throws HopsException if HopsException occurs - * @throws LopsException if LopsException occurs - */ - public abstract GDFGraph optimize( GDFGraph gdfgraph, Summary summary ) - throws DMLRuntimeException, HopsException, LopsException; -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizerWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizerWrapper.java b/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizerWrapper.java deleted file mode 100644 index 9d716b6..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/GlobalOptimizerWrapper.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - -import org.apache.sysml.hops.HopsException; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFGraph; -import org.apache.sysml.hops.globalopt.gdfgraph.GraphBuilder; -import org.apache.sysml.lops.LopsException; -import org.apache.sysml.parser.DMLProgram; -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.controlprogram.Program; -import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing; -import org.apache.sysml.utils.Explain; - -/** - * Main entry point for Global Data Flow Optimization. It is intended to be invoked after - * the initial runtime program was created (and thus with constructed hops and lops). - * - * - */ -public class GlobalOptimizerWrapper -{ - - private static final Log LOG = LogFactory.getLog(GlobalOptimizerWrapper.class); - private static final boolean LDEBUG = true; //local debug flag - - //supported optimizers - public enum GlobalOptimizerType{ - ENUMERATE_DP, - TRANSFORM - } - - //internal parameters - private static final GlobalOptimizerType OPTIM = GlobalOptimizerType.ENUMERATE_DP; - - static - { - // for internal debugging only - if( LDEBUG ) { - Logger.getLogger("org.apache.sysml.hops.globalopt") - .setLevel((Level) Level.DEBUG); - } - } - - public static Program optimizeProgram(DMLProgram prog, Program rtprog) - throws DMLRuntimeException, HopsException, LopsException - { - LOG.debug("Starting global data flow optimization."); - Timing time = new Timing(true); - - //create optimizer instance - GlobalOptimizer optimizer = createGlobalOptimizer( OPTIM ); - - //create global data flow graph - Summary summary = new Summary(); - GDFGraph graph = GraphBuilder.constructGlobalDataFlowGraph(rtprog, summary); - if( LOG.isDebugEnabled() ) { - LOG.debug("EXPLAIN GDFGraph:\n" + Explain.explainGDFNodes(graph.getGraphRootNodes(),1)); - } - - //core global data flow optimization - graph = optimizer.optimize(graph, summary); - - //get the final runtime program - rtprog = graph.getRuntimeProgram(); - - //print global optimizer summary - LOG.info( summary ); - - LOG.debug("Finished global data flow optimization in " + time.stop() + " ms."); - return rtprog; - } - - private static GlobalOptimizer createGlobalOptimizer( GlobalOptimizerType type ) - throws HopsException, DMLRuntimeException - { - GlobalOptimizer optimizer = null; - - switch( type ) - { - case ENUMERATE_DP: - optimizer = new GDFEnumOptimizer(); - break; - - //case TRANSFORM: - // optimizer = new GlobalTransformationOptimizer(Strategy.CANONICAL); - // ((GlobalTransformationOptimizer)optimizer).addRule(new BlockSizeRule()); - // break; - - default: - throw new HopsException("Unsupported global optimizer type: "+type+"."); - } - - return optimizer; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/InterestingProperties.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/InterestingProperties.java b/src/main/java/org/apache/sysml/hops/globalopt/InterestingProperties.java deleted file mode 100644 index ce641f5..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/InterestingProperties.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import java.util.Arrays; - -/** - * An instance of this class represents one 'interesting property set' defined by the instances - * of all interesting properties. We do not use objects per interesting property in order to - * (1) simplify the creation and interesting property sets, and (2) prevent excessive object - * creation and garbage collection. - * - */ -public class InterestingProperties -{ - - public enum Location { - MEM, - HDFS_CACHE, - HDFS - } - - public enum Format { - ANY, - BINARY_BLOCK, - BINARY_CELL, - TEXT_CELL, - TEXT_MM, - TEXT_CSV - } - - public enum Partitioning { - NONE, - ROW_WISE, - COL_WISE - //ROW_BLOCK_WISE, - //COL_BLOCK_WISE, - } - - //supported interesting properties - private int _blocksize = -1; //-1 for any - private Format _format = null; - private Location _location = null; - private Partitioning _pformat = null; - private int _replication = -1; - private boolean _emptyblocks = false; - - - public InterestingProperties( int blocksize, Format format, Location location, Partitioning pformat, int replication, boolean emptyblocks ) - { - _blocksize = blocksize; - _format = format; - _location = location; - _pformat = pformat; - _replication = replication; - _emptyblocks = emptyblocks; - } - - public InterestingProperties( InterestingProperties that ) - { - _blocksize = that._blocksize; - _format = that._format; - _location = that._location; - _pformat = that._pformat; - _replication = that._replication; - _emptyblocks = that._emptyblocks; - } - - @Override - public boolean equals(Object o) - { - if( !(o instanceof InterestingProperties) ) - return false; - - InterestingProperties that = (InterestingProperties)o; - return ( _blocksize == that._blocksize - && _format == that._format - && _location == that._location - && _pformat == that._pformat - && _replication == that._replication - && _emptyblocks == that._emptyblocks ); - } - - @Override - public int hashCode() - { - Object[] array = new Object[] { _blocksize, (_format != null) ? _format.ordinal() : -1, - (_location != null) ? _location.ordinal() : -1, (_pformat != null) ? _pformat.ordinal() : -1, - _replication, _emptyblocks }; - return Arrays.hashCode(array); - } - - @Override - public String toString() - { - return "IPS[" + _blocksize + "," + _format + "," + _location + "," + _pformat + "," + _replication + "," + _emptyblocks + "]"; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/MemoStructure.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/MemoStructure.java b/src/main/java/org/apache/sysml/hops/globalopt/MemoStructure.java deleted file mode 100644 index 056db69..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/MemoStructure.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import java.util.HashMap; -import java.util.Map.Entry; - -import org.apache.sysml.hops.globalopt.gdfgraph.GDFNode; - -/** - * This MemoStructure is the central location for storing enumerated plans (configurations) and serves - * two purposes: - * - * 1) Plan Memoization: Due to the DAG structure (where a single node is reachable over alternative - * paths), our top-down, recursive optimization procedure might visit an operator multiple times. - * The memo structure memoizes and reuses already generated plans. - * - * 2) config set cache - * - * The internal structure is as follows: - * Memo Structure := | LONG GDFNodeID | MemoEntry | * - * Memo Entry := | InterestingPropertySet | DOUBLE COST | * - * - */ -public class MemoStructure -{ - - private HashMap<Long, PlanSet> _entries = null; //NODEID | PLANSET - private HashMap<Long, Long> _nodeIDs = null; //NODEID | HOPID - - public MemoStructure() - { - _entries = new HashMap<>(); - _nodeIDs = new HashMap<>(); - } - - /////////////////////////////////////////////////// - // basic access to memo structure entries - - public boolean constainsEntry( GDFNode node ) - { - return _entries.containsKey( node.getID() ); - } - - public void putEntry( GDFNode node, PlanSet entry ) - { - _entries.put( node.getID(), entry ); - if( node.getHop()!=null ) - _nodeIDs.put( node.getID(), node.getHop().getHopID() ); - else - _nodeIDs.put( node.getID(), -1L ); - } - - public PlanSet getEntry( GDFNode node ) - { - return _entries.get( node.getID()) ; - } - - @Override - public String toString() - { - StringBuilder sb = new StringBuilder(); - sb.append("------------------------------------\n"); - sb.append("MEMO STRUCTURE (gdfnodeid, plans): \n"); - sb.append("------------------------------------\n"); - - for( Entry<Long, PlanSet> e : _entries.entrySet() ) - { - sb.append("------------------------------------\n"); - sb.append("Node "+e.getKey()+" (hop "+_nodeIDs.get(e.getKey())+"):\n"); - for( Plan p : e.getValue().getPlans() ) { - sb.append(p.toString()); - sb.append("\n"); - } - } - - return sb.toString(); - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/Plan.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/Plan.java b/src/main/java/org/apache/sysml/hops/globalopt/Plan.java deleted file mode 100644 index d62c0fc..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/Plan.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import java.util.ArrayList; - -import org.apache.sysml.hops.DataOp; -import org.apache.sysml.hops.FunctionOp; -import org.apache.sysml.hops.Hop.DataOpTypes; -import org.apache.sysml.hops.OptimizerUtils; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFNode; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFNode.NodeType; -import org.apache.sysml.lops.LopProperties.ExecType; -import org.apache.sysml.parser.Expression.DataType; -import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence; - - -public class Plan -{ - private static IDSequence _seqID = new IDSequence(); - - private long _ID = -1; - private GDFNode _node = null; - - private InterestingProperties _ips = null; - private RewriteConfig _conf = null; - private ArrayList<Plan> _childs = null; - private double _costs = -1; - - public Plan(GDFNode node, InterestingProperties ips, RewriteConfig rc, ArrayList<Plan> childs) - { - _ID = _seqID.getNextID(); - _node = node; - _ips = ips; - _conf = rc; - if( childs != null && !childs.isEmpty() ) - _childs = childs; - else - _childs = new ArrayList<>(); - } - - public Plan( Plan p ) - { - _ID = _seqID.getNextID(); - _node = p._node; - _ips = new InterestingProperties(p._ips); - _conf = new RewriteConfig(p._conf); - _costs = p._costs; - - if( p._childs != null && !p._childs.isEmpty() ) - _childs = new ArrayList<>(p._childs); - else - _childs = new ArrayList<>(); - } - - public GDFNode getNode() - { - return _node; - } - - public void addChild( Plan c ) - { - _childs.add(c); - } - - public ArrayList<Plan> getChilds() - { - return _childs; - } - - public InterestingProperties getInterestingProperties() - { - return _ips; - } - - public RewriteConfig getRewriteConfig() - { - return _conf; - } - - public void setCosts( double costs ) - { - _costs = costs; - } - - public double getCosts() - { - return _costs; - } - - /** - * If operation is executed in MR, all input blocksizes need to match. - * Note that the output blocksize can be different since we would add - * additional reblocks after that operation. - * - * @return true if valid blocksizes in MR - */ - public boolean checkValidBlocksizesInMR() - { - boolean ret = true; - ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; - - if( _conf.getExecType()==CLUSTER - && _childs != null && _childs.size() > 1 ) - { - int size0 = _childs.get(0)._conf.getBlockSize(); - if( size0 > 0 ) { //-1 compatible with everything - for( Plan c : _childs ) - ret &= ( c._conf.getBlockSize() == size0 - ||c._conf.getBlockSize() <= 0 ); - } - } - - return ret; - } - - public boolean checkValidBlocksizesTRead() - { - boolean ret = true; - - if( _node.getNodeType() == NodeType.HOP_NODE - && _node.getHop() instanceof DataOp - && ((DataOp)_node.getHop()).getDataOpType() == DataOpTypes.TRANSIENTREAD ) - { - for( Plan c : _childs ) - ret &= ( _conf.getBlockSize() == c._conf.getBlockSize() ); - } - - if( _node.getNodeType() == NodeType.CROSS_BLOCK_NODE ) - { - for( Plan c : _childs ) - ret &= ( _conf.getBlockSize() == c._conf.getBlockSize() ); - } - - return ret; - } - - /** - * If operation is executed in MR, only certain operations allow - * all formats. In general, unary operations also allow for cell inputs. - * TODO: check and test current format assumptions - * - * @return true if valid format in MR - */ - public boolean checkValidFormatInMR() - { - boolean ret = true; - ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; - - if( _conf.getExecType()==CLUSTER ) - { - if( _childs != null ) - for( Plan c : _childs ) - ret &= _node.isValidInputFormatForOperation(c._conf.getFormat()); - } - - return ret; - } - - public boolean checkValidExecutionType() - { - boolean ret = true; - - ret &= !( _node.getHop() instanceof FunctionOp && _conf.getExecType()!=ExecType.CP ); - //unnecessary, because reblock now merged into base hop - //ret &= !( _node.getHop() instanceof ReblockOp && _conf.getExecType()!=ExecType.MR ); - - return ret; - } - - /** - * A plan is defined as preferred if its output interesting properties - * match the interesting properties of all its matrix inputs. - * - * @return true if preferred plan - */ - public boolean isPreferredPlan() - { - boolean ret = true; - - for( Plan c : _childs ) - if( c.getNode().getDataType()==DataType.MATRIX ) - ret &= _ips.equals( c.getInterestingProperties() ); - - return ret; - } - - @Override - public String toString() - { - StringBuilder sb = new StringBuilder(); - sb.append("PLAN("+_ID+") ["); - sb.append(_ips.toString()); - sb.append(","); - sb.append(_conf.toString()); - sb.append(",{"); - for( Plan c : _childs ){ - sb.append(c._ID); - sb.append(","); - } - sb.setLength(sb.length()-1); - sb.append("},"); - sb.append(_costs); - sb.append("]"); - - return sb.toString(); - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/PlanSet.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/PlanSet.java b/src/main/java/org/apache/sysml/hops/globalopt/PlanSet.java deleted file mode 100644 index 41ee5f9..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/PlanSet.java +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import java.util.ArrayList; - -import org.apache.sysml.hops.globalopt.gdfgraph.GDFCrossBlockNode; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFNode; -import org.apache.sysml.hops.globalopt.gdfgraph.GDFNode.NodeType; - -public class PlanSet -{ - private ArrayList<Plan> _plans = null; - - public PlanSet() { - _plans = new ArrayList<>(); - } - - public PlanSet(ArrayList<Plan> plans) - { - _plans = plans; - } - - public ArrayList<Plan> getPlans() - { - return _plans; - } - - public void setPlans(ArrayList<Plan> plans) - { - _plans = plans; - } - - public int size() - { - if( _plans == null ) - return 0; - - return _plans.size(); - } - - public boolean isEmpty() - { - if( _plans == null ) - return true; - - return _plans.isEmpty(); - } - - public PlanSet crossProductChild(PlanSet pc) - { - //check for empty child plan set (return current plan set) - if( pc==null || pc.isEmpty() ) { - return this; - } - //check for empty parent plan set (pass-through child) - //(e.g., for crossblockop; this also implicitly reused costed runtime plans) - if( _plans == null || _plans.isEmpty() ) { - return pc; - } - - ArrayList<Plan> Pnew = new ArrayList<>(); - - // create cross product of plansets between partial and child plans - for( Plan p : _plans ) - for( Plan c : pc.getPlans() ) - { - Plan pnew = new Plan(p); - pnew.addChild( c ); - Pnew.add( pnew ); - } - - return new PlanSet(Pnew); - } - - public PlanSet selectChild( GDFNode node ) - { - String varname = (node.getNodeType()==NodeType.HOP_NODE) ? node.getHop().getName() : - ((GDFCrossBlockNode)node).getName(); - - ArrayList<Plan> Pnew = new ArrayList<>(); - for( Plan p : _plans ) - if( p.getNode().getHop()!=null - &&p.getNode().getHop().getName().equals(varname) ) - { - Pnew.add( p ); - } - - return new PlanSet(Pnew); - } - - public PlanSet union( PlanSet ps ) - { - ArrayList<Plan> Pnew = new ArrayList<>(_plans); - for( Plan p : ps._plans ) - Pnew.add( p ); - - return new PlanSet(Pnew); - } - - public Plan getPlanWithMinCosts() - { - //init global optimal plan and costs - double optCosts = Double.MAX_VALUE; - Plan optPlan = null; - - //compare costs of all plans - for( Plan p : _plans ) { - if( p.getCosts() < optCosts ) { - optCosts = p.getCosts(); - optPlan = p; - } - } - - return optPlan; - } - - - @Override - public String toString() - { - StringBuilder sb = new StringBuilder(); - sb.append("PLAN_SET@"+super.hashCode()+":\n"); - for( Plan p : _plans ) { - sb.append("--"); - sb.append( p.toString() ); - sb.append("\n"); - } - - return sb.toString(); - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/RewriteConfig.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/RewriteConfig.java b/src/main/java/org/apache/sysml/hops/globalopt/RewriteConfig.java deleted file mode 100644 index 5372451..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/RewriteConfig.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -import org.apache.sysml.hops.Hop.FileFormatTypes; -import org.apache.sysml.hops.globalopt.InterestingProperties.Location; -import org.apache.sysml.lops.LopProperties.ExecType; - - - -/** - * This RewriteConfig represents an instance configuration of a particular rewrite. - * - */ -public class RewriteConfig -{ - - /* - public enum RewriteConfigType { - BLOCK_SIZE, - FORMAT_CHANGE, - EXEC_TYPE, - DATA_PARTITIONING, - VECTORIZATION, - REPLICATION_FACTOR - } - */ - - private ExecType _rewriteSetExecType = null; - private int _rewriteSetBlockSize = -1; - private FileFormatTypes _rewriteFormat = null; - - public RewriteConfig( ExecType et, int bs, FileFormatTypes format ) - { - _rewriteSetExecType = et; - _rewriteSetBlockSize = bs; - _rewriteFormat = format; - } - - public RewriteConfig( RewriteConfig rc ) - { - _rewriteSetExecType = rc._rewriteSetExecType; - _rewriteSetBlockSize = rc._rewriteSetBlockSize; - _rewriteFormat = rc._rewriteFormat; - } - - public ExecType getExecType() - { - return _rewriteSetExecType; - } - - public int getBlockSize() - { - return _rewriteSetBlockSize; - } - - public FileFormatTypes getFormat() - { - return _rewriteFormat; - } - - public InterestingProperties deriveInterestingProperties() - { - int bs = _rewriteSetBlockSize; - Location loc = (_rewriteSetExecType==ExecType.CP) ? Location.MEM : Location.HDFS; - - return new InterestingProperties(bs, null, loc, null, -1, true); - } - - - @Override - public String toString() - { - return "RC["+_rewriteSetExecType+","+_rewriteSetBlockSize+"]";//+_type+"="+_value + "]"; - } - -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/Summary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/Summary.java b/src/main/java/org/apache/sysml/hops/globalopt/Summary.java deleted file mode 100644 index 0ae93d3..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/Summary.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt; - -public class Summary -{ - - private double _costsInitial = -1; - private double _costsOptimal = -1; - private long _numEnumPlans = -1; - private long _numPrunedInvalidPlans = -1; - private long _numPrunedSuboptPlans = -1; - private long _numCompiledPlans = -1; - private long _numCostedPlans = -1; - private long _numEnumPlanMismatch = -1; - private long _numFinalPlanMismatch = -1; - private double _timeGDFGraph = -1; - private double _timeOptim = -1; - - public void setCostsInitial(double costsInitial) { - _costsInitial = costsInitial; - } - - public void setCostsOptimal(double costsOptimal) { - _costsOptimal = costsOptimal; - } - - public void setNumEnumPlans(long numEnumPlans) { - _numEnumPlans = numEnumPlans; - } - - public void setNumPrunedInvalidPlans(long numPrunedInvalidPlans) { - _numPrunedInvalidPlans = numPrunedInvalidPlans; - } - - public void setNumPrunedSuboptPlans(long numPrunedSuboptPlans) { - _numPrunedSuboptPlans = numPrunedSuboptPlans; - } - - public void setNumCompiledPlans(long numCompiledPlans) { - _numCompiledPlans = numCompiledPlans; - } - - public void setNumCostedPlans(long numCostedPlans) { - _numCostedPlans = numCostedPlans; - } - - public void setNumEnumPlanMismatch(long numEnumPlanMismatch) { - _numEnumPlanMismatch = numEnumPlanMismatch; - } - - public void setNumFinalPlanMismatch(long numFinalPlanMismatch) { - _numFinalPlanMismatch = numFinalPlanMismatch; - } - - public void setTimeGDFGraph(double timeGDFGraph) { - _timeGDFGraph = timeGDFGraph; - } - - public void setTimeOptim(double timeOptim) { - _timeOptim = timeOptim; - } - - @Override - public String toString() - { - StringBuilder sb = new StringBuilder(); - - sb.append("\nGlobal Optimization Summary:\n"); - sb.append("-- costs of initial plan: "+_costsInitial +"\n"); - sb.append("-- costs of optimal plan: "+_costsOptimal +"\n"); - sb.append("-- # enumerated plans: "+_numEnumPlans +"\n"); - sb.append("-- # pruned invalid plans: "+_numPrunedInvalidPlans +"\n"); - sb.append("-- # pruned subopt plans: "+_numPrunedSuboptPlans +"\n"); - sb.append("-- # program compilations: "+_numCompiledPlans +"\n"); - sb.append("-- # program costings: "+_numCostedPlans +"\n"); - sb.append("-- # enum plan mismatch: "+_numEnumPlanMismatch +"\n"); - sb.append("-- # final plan mismatch: "+_numFinalPlanMismatch +"\n"); - sb.append("-- graph creation time: "+String.format("%.3f", _timeGDFGraph/1000)+" sec.\n"); - sb.append("-- optimization time: "+String.format("%.3f", _timeOptim/1000)+" sec."); - - return sb.toString(); - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFCrossBlockNode.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFCrossBlockNode.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFCrossBlockNode.java deleted file mode 100644 index 52d988d..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFCrossBlockNode.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt.gdfgraph; - -import java.util.ArrayList; - -import org.apache.sysml.hops.Hop; -import org.apache.sysml.runtime.controlprogram.ProgramBlock; - -/** - * Crossblock operators represent - * - */ -public class GDFCrossBlockNode extends GDFNode -{ - - - public enum CrossBlockNodeType { - PLAIN, - MERGE, - } - - private CrossBlockNodeType _cbtype = null; - private String _name = null; - - /** - * Constructor PLAIN crossblocknode - * - * @param hop high-level operator - * @param pb program block - * @param input GDF node - * @param name the name - */ - public GDFCrossBlockNode( Hop hop, ProgramBlock pb, GDFNode input, String name ) - { - super(hop, pb, null); - _type = NodeType.CROSS_BLOCK_NODE; - _inputs = new ArrayList<>(); - _inputs.add( input ); - - _cbtype = CrossBlockNodeType.PLAIN; - _name = name; - } - - /** - * Constructor MERGE crossblocknode - * - * @param hop high-level operator - * @param pb program block - * @param input1 GDF node 1 - * @param input2 GDF node 2 - * @param name the name - */ - public GDFCrossBlockNode( Hop hop, ProgramBlock pb, GDFNode input1, GDFNode input2, String name ) - { - super(hop, pb, null); - _type = NodeType.CROSS_BLOCK_NODE; - _inputs = new ArrayList<>(); - _inputs.add( input1 ); - _inputs.add( input2 ); - - _cbtype = CrossBlockNodeType.MERGE; - _name = name; - } - - public String getName() - { - return _name; - } - - @Override - public String explain(String deps) - { - String ldeps = (deps!=null) ? deps : ""; - - return "CBNode "+ldeps+" ["+_name+", "+_cbtype.toString().toLowerCase()+"]"; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFGraph.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFGraph.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFGraph.java deleted file mode 100644 index 669f982..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFGraph.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt.gdfgraph; - -import java.util.ArrayList; - -import org.apache.sysml.runtime.controlprogram.Program; - -public class GDFGraph -{ - - - private ArrayList<GDFNode> _roots = null; - private Program _rtprog = null; - - - public GDFGraph( Program prog, ArrayList<GDFNode> roots ) - { - _rtprog = prog; - _roots = roots; - } - - public ArrayList<GDFNode> getGraphRootNodes() - { - return _roots; - } - - public Program getRuntimeProgram() - { - return _rtprog; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFLoopNode.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFLoopNode.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFLoopNode.java deleted file mode 100644 index 010ca1d..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFLoopNode.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt.gdfgraph; - -import java.util.ArrayList; -import java.util.HashMap; - -import org.apache.sysml.runtime.controlprogram.ProgramBlock; - -public class GDFLoopNode extends GDFNode -{ - - private GDFNode _predicate = null; - private HashMap<String,GDFNode> _linputs = null; //all read variables - private HashMap<String,GDFNode> _loutputs = null; //all updated variables, not necessarily liveout - - public GDFLoopNode( ProgramBlock pb, GDFNode predicate, HashMap<String, GDFNode> inputs, HashMap<String,GDFNode> outputs ) - { - super(null, pb, new ArrayList<>(inputs.values())); - _type = NodeType.LOOP_NODE; - _predicate = predicate; - _linputs = inputs; - _loutputs = outputs; - } - - public HashMap<String, GDFNode> getLoopInputs() - { - return _linputs; - } - - public HashMap<String, GDFNode> getLoopOutputs() - { - return _loutputs; - } - - public GDFNode getLoopPredicate() - { - return _predicate; - } - - @Override - public String explain(String deps) - { - String ldeps = (deps!=null) ? deps : ""; - - //current node details - return "LoopNode "+ldeps+" ["+_linputs.keySet().toString()+","+_loutputs.keySet().toString()+"]"; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java deleted file mode 100644 index e385a86..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GDFNode.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt.gdfgraph; - -import java.util.ArrayList; - -import org.apache.sysml.hops.AggUnaryOp; -import org.apache.sysml.hops.DataGenOp; -import org.apache.sysml.hops.Hop; -import org.apache.sysml.hops.Hop.DataGenMethod; -import org.apache.sysml.hops.Hop.Direction; -import org.apache.sysml.hops.Hop.FileFormatTypes; -import org.apache.sysml.hops.Hop.OpOp1; -import org.apache.sysml.hops.UnaryOp; -import org.apache.sysml.hops.rewrite.HopRewriteUtils; -import org.apache.sysml.parser.Expression.DataType; -import org.apache.sysml.runtime.controlprogram.Program; -import org.apache.sysml.runtime.controlprogram.ProgramBlock; -import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence; - -/** - * The reason of a custom graph structure is to unify both within DAG - * and cross DAG enumeration. Conceptually, we would only need interesting - * properties of transient reads and could compile locally. - * - * Furthermore, having a global graph structure also allows for more advanced - * algebraic simplification rewrites because the semantics of transient read - * inputs are always available. - * - */ -public class GDFNode -{ - - public enum NodeType{ - HOP_NODE, - LOOP_NODE, - CROSS_BLOCK_NODE, - } - - private static IDSequence _seqID = new IDSequence(); - - protected NodeType _type = null; - protected long _ID = -1; - - //references to original program and hop dag - protected Hop _hop = null; - protected ProgramBlock _pb = null; - - //input nodes - protected ArrayList<GDFNode> _inputs = null; - - public GDFNode() - { - _ID = _seqID.getNextID(); - } - - public GDFNode( Hop hop, ProgramBlock pb, ArrayList<GDFNode> inputs ) - { - this(); - _type = NodeType.HOP_NODE; - _hop = hop; - _pb = pb; - _inputs = inputs; - } - - public NodeType getNodeType() - { - return _type; - } - - public long getID() - { - return _ID; - } - - public Hop getHop() - { - return _hop; - } - - public ProgramBlock getProgramBlock() - { - return _pb; - } - - public Program getProgram() - { - if( _pb != null ) - return _pb.getProgram(); - return null; - } - - public ArrayList<GDFNode> getInputs() - { - return _inputs; - } - - public DataType getDataType() - { - return _hop.getDataType(); - } - - /** - * If the output or any input is a matrix we need to consider - * MR configurations. This for examples excludes Literals or - * any purely scalar operation. - * - * @return true if requires MR enumeration - */ - public boolean requiresMREnumeration() - { - //general rule: MR generation required if at least one matrix input/output - boolean ret = (_hop.getDataType() == DataType.MATRIX); - for( Hop c : _hop.getInput() ) - ret |= (c.getDataType() == DataType.MATRIX); - - //special cases of CP-only operators - if( _hop instanceof UnaryOp && ((UnaryOp)_hop).getOp()==OpOp1.CAST_AS_SCALAR ) //as.scalar - ret = false; - if( _hop instanceof DataGenOp && ((DataGenOp)_hop).getOp()==DataGenMethod.SINIT ) //matrix(str, ) - ret = false; - if( _hop instanceof UnaryOp && ((UnaryOp)_hop).getOp()==OpOp1.NROW ) //nrow - meta data only - ret = false; - if( _hop instanceof UnaryOp && ((UnaryOp)_hop).getOp()==OpOp1.NCOL ) //ncol - meta data only - ret = false; - - return ret; - } - - public boolean isValidInputFormatForOperation( FileFormatTypes format ) - { - return ( _hop instanceof UnaryOp && format!=FileFormatTypes.CSV - || (_hop instanceof AggUnaryOp && ((AggUnaryOp)_hop).getDirection()==Direction.RowCol && format!=FileFormatTypes.CSV) - || (HopRewriteUtils.isTransposeOperation(_hop) && format!=FileFormatTypes.CSV) - || format==FileFormatTypes.BINARY ); //any op - } - - public String explain(String deps) - { - String ldeps = (deps!=null) ? deps : ""; - - //node details - if( _hop!=null ) - return "Node "+ldeps+" ["+_hop.getHopID()+", "+_hop.getOpString()+"]"; - else - return "Node "+ldeps+" [null]"; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GraphBuilder.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GraphBuilder.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GraphBuilder.java deleted file mode 100644 index 923807e..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/gdfgraph/GraphBuilder.java +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt.gdfgraph; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map.Entry; -import java.util.Set; - -import org.apache.sysml.hops.DataOp; -import org.apache.sysml.hops.Hop; -import org.apache.sysml.hops.Hop.DataOpTypes; -import org.apache.sysml.hops.globalopt.Summary; -import org.apache.sysml.hops.HopsException; -import org.apache.sysml.parser.ForStatementBlock; -import org.apache.sysml.parser.IfStatementBlock; -import org.apache.sysml.parser.StatementBlock; -import org.apache.sysml.parser.WhileStatementBlock; -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.controlprogram.ForProgramBlock; -import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock; -import org.apache.sysml.runtime.controlprogram.IfProgramBlock; -import org.apache.sysml.runtime.controlprogram.Program; -import org.apache.sysml.runtime.controlprogram.ProgramBlock; -import org.apache.sysml.runtime.controlprogram.WhileProgramBlock; -import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing; -import org.apache.sysml.utils.Explain; - -/** - * GENERAL 'GDF GRAPH' STRUCTURE, by MB: - * 1) Each hop is represented by an GDFNode - * 2) Each loop is represented by a structured GDFLoopNode - * 3) Transient Read/Write connections are represented via CrossBlockNodes, - * a) type PLAIN: single input crossblocknode represents unconditional data flow - * b) type MERGE: two inputs crossblocknode represent conditional data flow merge - * - * In detail, the graph builder essentially does a single pass over the entire program - * and constructs the global data flow graph bottom up. We create crossblocknodes for - * every transient write, loop nodes for for/while programblocks, and crossblocknodes - * after every if programblock. - * - */ -public class GraphBuilder -{ - - private static final boolean IGNORE_UNBOUND_UPDATED_VARS = true; - - public static GDFGraph constructGlobalDataFlowGraph( Program prog, Summary summary ) - throws DMLRuntimeException, HopsException - { - Timing time = new Timing(true); - - HashMap<String, GDFNode> roots = new HashMap<>(); - for( ProgramBlock pb : prog.getProgramBlocks() ) - constructGDFGraph( pb, roots ); - - //create GDF graph root nodes - ArrayList<GDFNode> ret = new ArrayList<>(); - for( GDFNode root : roots.values() ) - if( !(root instanceof GDFCrossBlockNode) ) - ret.add(root); - - //create GDF graph - GDFGraph graph = new GDFGraph(prog, ret); - - summary.setTimeGDFGraph(time.stop()); - return graph; - } - - @SuppressWarnings("unchecked") - private static void constructGDFGraph( ProgramBlock pb, HashMap<String, GDFNode> roots ) - throws DMLRuntimeException, HopsException - { - if (pb instanceof FunctionProgramBlock ) - { - throw new DMLRuntimeException("FunctionProgramBlocks not implemented yet."); - } - else if (pb instanceof WhileProgramBlock) - { - WhileProgramBlock wpb = (WhileProgramBlock) pb; - WhileStatementBlock wsb = (WhileStatementBlock) pb.getStatementBlock(); - //construct predicate node (conceptually sequence of from/to/incr) - GDFNode pred = constructGDFGraph(wsb.getPredicateHops(), wpb, new HashMap<Long, GDFNode>(), roots); - HashMap<String,GDFNode> inputs = constructLoopInputNodes(wpb, wsb, roots); - HashMap<String,GDFNode> lroots = (HashMap<String, GDFNode>) inputs.clone(); - //process childs blocks - for( ProgramBlock pbc : wpb.getChildBlocks() ) - constructGDFGraph(pbc, lroots); - HashMap<String,GDFNode> outputs = constructLoopOutputNodes(wsb, lroots); - GDFLoopNode lnode = new GDFLoopNode(wpb, pred, inputs, outputs ); - //construct crossblock nodes - constructLoopOutputCrossBlockNodes(wsb, lnode, outputs, roots, wpb); - } - else if (pb instanceof IfProgramBlock) - { - IfProgramBlock ipb = (IfProgramBlock) pb; - IfStatementBlock isb = (IfStatementBlock) pb.getStatementBlock(); - //construct predicate - if( isb.getPredicateHops()!=null ) { - Hop pred = isb.getPredicateHops(); - roots.put(pred.getName(), constructGDFGraph(pred, ipb, new HashMap<Long,GDFNode>(), roots)); - } - //construct if and else branch separately - HashMap<String,GDFNode> ifRoots = (HashMap<String, GDFNode>) roots.clone(); - HashMap<String,GDFNode> elseRoots = (HashMap<String, GDFNode>) roots.clone(); - for( ProgramBlock pbc : ipb.getChildBlocksIfBody() ) - constructGDFGraph(pbc, ifRoots); - if( ipb.getChildBlocksElseBody()!=null ) - for( ProgramBlock pbc : ipb.getChildBlocksElseBody() ) - constructGDFGraph(pbc, elseRoots); - //merge data flow roots (if no else, elseRoots refer to original roots) - reconcileMergeIfProgramBlockOutputs(ifRoots, elseRoots, roots, ipb); - } - else if (pb instanceof ForProgramBlock) //incl parfor - { - ForProgramBlock fpb = (ForProgramBlock) pb; - ForStatementBlock fsb = (ForStatementBlock)pb.getStatementBlock(); - //construct predicate node (conceptually sequence of from/to/incr) - GDFNode pred = constructForPredicateNode(fpb, fsb, roots); - HashMap<String,GDFNode> inputs = constructLoopInputNodes(fpb, fsb, roots); - HashMap<String,GDFNode> lroots = (HashMap<String, GDFNode>) inputs.clone(); - //process childs blocks - for( ProgramBlock pbc : fpb.getChildBlocks() ) - constructGDFGraph(pbc, lroots); - HashMap<String,GDFNode> outputs = constructLoopOutputNodes(fsb, lroots); - GDFLoopNode lnode = new GDFLoopNode(fpb, pred, inputs, outputs ); - //construct crossblock nodes - constructLoopOutputCrossBlockNodes(fsb, lnode, outputs, roots, fpb); - } - else //last-level program block - { - StatementBlock sb = pb.getStatementBlock(); - ArrayList<Hop> hops = sb.getHops(); - if( hops != null ) - { - //create new local memo structure for local dag - HashMap<Long, GDFNode> lmemo = new HashMap<>(); - for( Hop hop : hops ) - { - //recursively construct GDF graph for hop dag root - GDFNode root = constructGDFGraph(hop, pb, lmemo, roots); - if( root == null ) - throw new HopsException( "GDFGraphBuilder: failed to constuct dag root for: "+Explain.explain(hop) ); - - //create cross block nodes for all transient writes - if( hop instanceof DataOp && ((DataOp)hop).getDataOpType()==DataOpTypes.TRANSIENTWRITE ) - root = new GDFCrossBlockNode(hop, pb, root, hop.getName()); - - //add GDF root node to global roots - roots.put(hop.getName(), root); - } - } - - } - } - - private static GDFNode constructGDFGraph( Hop hop, ProgramBlock pb, HashMap<Long, GDFNode> lmemo, HashMap<String, GDFNode> roots ) - { - if( lmemo.containsKey(hop.getHopID()) ) - return lmemo.get(hop.getHopID()); - - //process childs recursively first - ArrayList<GDFNode> inputs = new ArrayList<>(); - for( Hop c : hop.getInput() ) - inputs.add( constructGDFGraph(c, pb, lmemo, roots) ); - - //connect transient reads to existing roots of data flow graph - if( hop instanceof DataOp && ((DataOp)hop).getDataOpType()==DataOpTypes.TRANSIENTREAD ){ - inputs.add(roots.get(hop.getName())); - } - - //add current hop - GDFNode gnode = new GDFNode(hop, pb, inputs); - - //add GDF node of updated variables to global roots (necessary for loops, where updated local - //variables might never be bound to their logical variables names - if( !IGNORE_UNBOUND_UPDATED_VARS ) { - //NOTE: currently disabled because unnecessary, if no transientwrite by definition included in other transientwrite - if( pb.getStatementBlock()!=null && pb.getStatementBlock().variablesUpdated().containsVariable(hop.getName()) ) { - roots.put(hop.getName(), gnode); - } - } - - //memoize current node - lmemo.put(hop.getHopID(), gnode); - - return gnode; - } - - private static GDFNode constructForPredicateNode(ForProgramBlock fpb, ForStatementBlock fsb, HashMap<String, GDFNode> roots) - { - HashMap<Long, GDFNode> memo = new HashMap<>(); - GDFNode from = (fsb.getFromHops()!=null)? constructGDFGraph(fsb.getFromHops(), fpb, memo, roots) : null; - GDFNode to = (fsb.getToHops()!=null)? constructGDFGraph(fsb.getToHops(), fpb, memo, roots) : null; - GDFNode incr = (fsb.getIncrementHops()!=null)? constructGDFGraph(fsb.getIncrementHops(), fpb, memo, roots) : null; - ArrayList<GDFNode> inputs = new ArrayList<>(); - inputs.add(from); - inputs.add(to); - inputs.add(incr); - //TODO for predicates - GDFNode pred = new GDFNode(null, fpb, inputs ); - - return pred; - } - - private static HashMap<String, GDFNode> constructLoopInputNodes( ProgramBlock fpb, StatementBlock fsb, HashMap<String, GDFNode> roots ) - throws DMLRuntimeException - { - HashMap<String, GDFNode> ret = new HashMap<>(); - Set<String> invars = fsb.variablesRead().getVariableNames(); - for( String var : invars ) { - if( fsb.liveIn().containsVariable(var) ) { - GDFNode node = roots.get(var); - if( node == null ) - throw new DMLRuntimeException("GDFGraphBuilder: Non-existing input node for variable: "+var); - ret.put(var, node); - } - } - - return ret; - } - - private static HashMap<String, GDFNode> constructLoopOutputNodes( StatementBlock fsb, HashMap<String, GDFNode> roots ) - throws HopsException - { - HashMap<String, GDFNode> ret = new HashMap<>(); - - Set<String> outvars = fsb.variablesUpdated().getVariableNames(); - for( String var : outvars ) - { - GDFNode node = roots.get(var); - - //handle non-existing nodes - if( node == null ) { - if( !IGNORE_UNBOUND_UPDATED_VARS ) - throw new HopsException( "GDFGraphBuilder: failed to constuct loop output for variable: "+var ); - else - continue; //skip unbound updated variables - } - - //add existing node to loop outputs - ret.put(var, node); - } - - return ret; - } - - private static void reconcileMergeIfProgramBlockOutputs( HashMap<String, GDFNode> ifRoots, HashMap<String, GDFNode> elseRoots, HashMap<String, GDFNode> roots, IfProgramBlock pb ) - { - //merge same variable names, different data - //( incl add new vars from if branch if node2==null) - for( Entry<String, GDFNode> e : ifRoots.entrySet() ){ - GDFNode node1 = e.getValue(); - GDFNode node2 = elseRoots.get(e.getKey()); //original or new - if( node1 != node2 ) - node1 = new GDFCrossBlockNode(null, pb, node1, node2, e.getKey() ); - roots.put(e.getKey(), node1); - } - - //add new vars from else branch - for( Entry<String, GDFNode> e : elseRoots.entrySet() ){ - if( !ifRoots.containsKey(e.getKey()) ) - roots.put(e.getKey(), e.getValue()); - } - } - - private static void constructLoopOutputCrossBlockNodes(StatementBlock sb, GDFLoopNode loop, HashMap<String, GDFNode> loutputs, HashMap<String, GDFNode> roots, ProgramBlock pb) - { - //iterate over all output (updated) variables - for( Entry<String,GDFNode> e : loutputs.entrySet() ) - { - //create crossblocknode, if updated variable is also in liveout - if( sb.liveOut().containsVariable(e.getKey()) ) { - GDFCrossBlockNode node = null; - if( roots.containsKey(e.getKey()) ) - node = new GDFCrossBlockNode(null, pb, roots.get(e.getKey()), loop, e.getKey()); //MERGE - else - node = new GDFCrossBlockNode(null, pb, loop, e.getKey()); //PLAIN - roots.put(e.getKey(), node); - } - } - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/b3419b09/src/main/java/org/apache/sysml/hops/globalopt/gdfresolve/GDFMismatchHeuristic.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/globalopt/gdfresolve/GDFMismatchHeuristic.java b/src/main/java/org/apache/sysml/hops/globalopt/gdfresolve/GDFMismatchHeuristic.java deleted file mode 100644 index c2a44b1..0000000 --- a/src/main/java/org/apache/sysml/hops/globalopt/gdfresolve/GDFMismatchHeuristic.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops.globalopt.gdfresolve; - -import org.apache.sysml.hops.globalopt.RewriteConfig; - -public abstract class GDFMismatchHeuristic -{ - - public enum MismatchHeuristicType { - FIRST, - BLOCKSIZE_OR_FIRST, - } - - /** - * Returns the name of the implementing mismatch heuristic. - * - * @return the name of the implementing mismatch heuristic - */ - public abstract String getName(); - - /** - * Resolve the mismatch of two given rewrite configurations. This call returns true, - * if and only if the new configuration is chosen. - * - * @param currRc the current rewrite config - * @param newRc the new rewrite config - * @return true if and only if the new configuration is chosen - */ - public abstract boolean resolveMismatch( RewriteConfig currRc, RewriteConfig newRc ); -}