Repository: systemml Updated Branches: refs/heads/master ae9eadc85 -> 573943e0e
[SYSTEMML-2452] Fix codegen on parfor recompilation, uni/bivar stats Dynamic recompilation on parfor entry happens in an in-place manner. With codegen this is invalid in the presence of changing dimensions (e.g., stepwise linear regression). Hence, SYSTEMML-2072 introduced dedicated handling for codegen on in-place recompilation. However, this led to a performance regression on univariate/bivariate stats with codegen because the deep copy for codegen happened after clear lops which corrupted the original DAG without proper recompilation (that happened on the deep copy). This led to operators being interpreted as CP and thus to inconsistencies between hops and instructions in case of large data (i.e., with spark instructions). All this eventually resulted in a local plan and thus a performance regression. With this patch we find the right plan again Furthermore, this also includes a HOTFIX for the previous parfor change by avoiding the size-based guard in the constrained optimizer as used in the tests to force data partitioning. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/573943e0 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/573943e0 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/573943e0 Branch: refs/heads/master Commit: 573943e0e5aaa8fde5e2e25878187d47e5d2403d Parents: ae9eadc Author: Matthias Boehm <[email protected]> Authored: Tue Jul 17 23:12:30 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Tue Jul 17 23:12:30 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/sysml/hops/recompile/Recompiler.java | 10 ++++++---- .../controlprogram/parfor/opt/OptimizerConstrained.java | 10 +++++----- .../controlprogram/parfor/opt/OptimizerRuleBased.java | 10 +++++----- 3 files changed, 16 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/573943e0/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java index 4175eac..21923de 100644 --- a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java +++ b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java @@ -295,12 +295,16 @@ public class Recompiler private static ArrayList<Instruction> recompile(StatementBlock sb, ArrayList<Hop> hops, LocalVariableMap vars, RecompileStatus status, boolean inplace, boolean replaceLit, boolean updateStats, boolean forceEt, boolean pred, ExecType et, long tid ) { + boolean codegen = ConfigurationManager.isCodegenEnabled() + && !(forceEt && et == null ) //not on reset + && SpoofCompiler.RECOMPILE_CODEGEN; + // prepare hops dag for recompile if( !inplace ){ // deep copy hop dag (for non-reversable rewrites) hops = deepCopyHopsDag(hops); } - else { + else if( !codegen ) { // clear existing lops Hop.resetVisitStatus(hops); for( Hop hopRoot : hops ) @@ -356,9 +360,7 @@ public class Recompiler } // codegen if enabled - if( ConfigurationManager.isCodegenEnabled() - && !(forceEt && et == null ) //not on reset - && SpoofCompiler.RECOMPILE_CODEGEN ) { + if( codegen ) { //create deep copy for in-place if( inplace ) hops = deepCopyHopsDag(hops); http://git-wip-us.apache.org/repos/asf/systemml/blob/573943e0/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java index 3864004..8f67a3e 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java @@ -106,7 +106,7 @@ public class OptimizerConstrained extends OptimizerRuleBased // rewrite 1: data partitioning (incl. log. recompile RIX) HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>(); - rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget() ); + rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget(), true); double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate // rewrite 2: remove unnecessary compare matrix @@ -132,8 +132,8 @@ public class OptimizerConstrained extends OptimizerRuleBased { if( M1 > _rm && M3 <= _rm ) { // rewrite 1: data partitioning (apply conditional partitioning) - rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, M3 ); - M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate + rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, M3, true ); + M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate } if( flagRecompMR ){ @@ -225,11 +225,11 @@ public class OptimizerConstrained extends OptimizerRuleBased /// @Override - protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String,PartitionFormat> partitionedMatrices, double thetaM) + protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String,PartitionFormat> partitionedMatrices, double thetaM, boolean constrained) { //call rewrite first to obtain partitioning information String initPlan = n.getParam(ParamType.DATA_PARTITIONER); - boolean blockwise = super.rewriteSetDataPartitioner(n, vars, partitionedMatrices, thetaM); + boolean blockwise = super.rewriteSetDataPartitioner(n, vars, partitionedMatrices, thetaM, constrained); // constraint awareness if( !initPlan.equals(PDataPartitioner.UNSPECIFIED.name()) ) { http://git-wip-us.apache.org/repos/asf/systemml/blob/573943e0/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java index 035b265..cbc8d8d 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java @@ -228,7 +228,7 @@ public class OptimizerRuleBased extends Optimizer // rewrite 1: data partitioning (incl. log. recompile RIX and flag opt nodes) HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>(); - rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget() ); + rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget(), false ); double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate // rewrite 2: remove unnecessary compare matrix (before result partitioning) @@ -254,8 +254,8 @@ public class OptimizerRuleBased extends Optimizer { if( M1 > _rm && M3 <= _rm ) { // rewrite 1: data partitioning (apply conditional partitioning) - rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, M3 ); - M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate + rewriteSetDataPartitioner( pn, ec.getVariables(), partitionedMatrices, M3, false ); + M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate } if( flagRecompMR ){ @@ -393,7 +393,7 @@ public class OptimizerRuleBased extends Optimizer //REWRITE set data partitioner /// - protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM ) + protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM, boolean constrained ) { if( n.getNodeType() != NodeType.PARFOR ) LOG.warn(getOptMode()+" OPT: Data partitioner can only be set for a ParFor node."); @@ -417,7 +417,7 @@ public class OptimizerRuleBased extends Optimizer double mem = getMemoryEstimate(c, vars); if( dpf != PartitionFormat.NONE && dpf._dpf != PDataPartitionFormat.BLOCK_WISE_M_N - && mem > _lm/2 && mem > _rm/2 ) { + && (constrained || (mem > _lm/2 && mem > _rm/2)) ) { cand2.put( c, dpf ); } }
