Repository: systemml
Updated Branches:
  refs/heads/master ae9eadc85 -> 573943e0e


[SYSTEMML-2452] Fix codegen on parfor recompilation, uni/bivar stats

Dynamic recompilation on parfor entry happens in an in-place manner.
With codegen this is invalid in the presence of changing dimensions
(e.g., stepwise linear regression). Hence, SYSTEMML-2072 introduced
dedicated handling for codegen on in-place recompilation. However, this
led to a performance regression on univariate/bivariate stats with
codegen because the deep copy for codegen happened after clear lops
which corrupted the original DAG without proper recompilation (that
happened on the deep copy). This led to operators being interpreted as
CP and thus to inconsistencies between hops and instructions in case of
large data (i.e., with spark instructions). All this eventually resulted
in a local plan and thus a performance regression. With this patch we
find the right plan again

Furthermore, this also includes a HOTFIX for the previous parfor change
by avoiding the size-based guard in the constrained optimizer as used in
the tests to force data partitioning.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/573943e0
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/573943e0
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/573943e0

Branch: refs/heads/master
Commit: 573943e0e5aaa8fde5e2e25878187d47e5d2403d
Parents: ae9eadc
Author: Matthias Boehm <[email protected]>
Authored: Tue Jul 17 23:12:30 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Tue Jul 17 23:12:30 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/recompile/Recompiler.java  | 10 ++++++----
 .../controlprogram/parfor/opt/OptimizerConstrained.java   | 10 +++++-----
 .../controlprogram/parfor/opt/OptimizerRuleBased.java     | 10 +++++-----
 3 files changed, 16 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/573943e0/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java 
b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
index 4175eac..21923de 100644
--- a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
@@ -295,12 +295,16 @@ public class Recompiler
        private static ArrayList<Instruction> recompile(StatementBlock sb, 
ArrayList<Hop> hops, LocalVariableMap vars, RecompileStatus status,
                boolean inplace, boolean replaceLit, boolean updateStats, 
boolean forceEt, boolean pred, ExecType et, long tid ) 
        {
+               boolean codegen = ConfigurationManager.isCodegenEnabled()
+                       && !(forceEt && et == null ) //not on reset
+                       && SpoofCompiler.RECOMPILE_CODEGEN;
+               
                // prepare hops dag for recompile
                if( !inplace ){ 
                        // deep copy hop dag (for non-reversable rewrites)
                        hops = deepCopyHopsDag(hops);
                }
-               else {
+               else if( !codegen ) {
                        // clear existing lops
                        Hop.resetVisitStatus(hops);
                        for( Hop hopRoot : hops )
@@ -356,9 +360,7 @@ public class Recompiler
                }
                
                // codegen if enabled
-               if( ConfigurationManager.isCodegenEnabled()
-                       && !(forceEt && et == null ) //not on reset
-                       && SpoofCompiler.RECOMPILE_CODEGEN ) {
+               if( codegen ) {
                        //create deep copy for in-place
                        if( inplace )
                                hops = deepCopyHopsDag(hops);

http://git-wip-us.apache.org/repos/asf/systemml/blob/573943e0/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
index 3864004..8f67a3e 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
@@ -106,7 +106,7 @@ public class OptimizerConstrained extends OptimizerRuleBased
 
                // rewrite 1: data partitioning (incl. log. recompile RIX)
                HashMap<String, PartitionFormat> partitionedMatrices = new 
HashMap<>();
-               rewriteSetDataPartitioner( pn, ec.getVariables(), 
partitionedMatrices, OptimizerUtils.getLocalMemBudget() );
+               rewriteSetDataPartitioner(pn, ec.getVariables(), 
partitionedMatrices, OptimizerUtils.getLocalMemBudget(), true);
                double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); 
//reestimate
 
                // rewrite 2: remove unnecessary compare matrix
@@ -132,8 +132,8 @@ public class OptimizerConstrained extends OptimizerRuleBased
                {
                        if( M1 > _rm && M3 <= _rm  ) {
                                // rewrite 1: data partitioning (apply 
conditional partitioning)
-                               rewriteSetDataPartitioner( pn, 
ec.getVariables(), partitionedMatrices, M3 );
-                               M1 = 
_cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate              
+                               rewriteSetDataPartitioner( pn, 
ec.getVariables(), partitionedMatrices, M3, true );
+                               M1 = 
_cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate 
                        }
                        
                        if( flagRecompMR ){
@@ -225,11 +225,11 @@ public class OptimizerConstrained extends 
OptimizerRuleBased
        ///
 
        @Override
-       protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap 
vars, HashMap<String,PartitionFormat> partitionedMatrices, double thetaM)
+       protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap 
vars, HashMap<String,PartitionFormat> partitionedMatrices, double thetaM, 
boolean constrained)
        {
                //call rewrite first to obtain partitioning information
                String initPlan = n.getParam(ParamType.DATA_PARTITIONER);
-               boolean blockwise = super.rewriteSetDataPartitioner(n, vars, 
partitionedMatrices, thetaM);
+               boolean blockwise = super.rewriteSetDataPartitioner(n, vars, 
partitionedMatrices, thetaM, constrained);
                
                // constraint awareness
                if( !initPlan.equals(PDataPartitioner.UNSPECIFIED.name()) ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/573943e0/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index 035b265..cbc8d8d 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -228,7 +228,7 @@ public class OptimizerRuleBased extends Optimizer
                
                // rewrite 1: data partitioning (incl. log. recompile RIX and 
flag opt nodes)
                HashMap<String, PartitionFormat> partitionedMatrices = new 
HashMap<>();
-               rewriteSetDataPartitioner( pn, ec.getVariables(), 
partitionedMatrices, OptimizerUtils.getLocalMemBudget() );
+               rewriteSetDataPartitioner( pn, ec.getVariables(), 
partitionedMatrices, OptimizerUtils.getLocalMemBudget(), false );
                double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); 
//reestimate
                
                // rewrite 2: remove unnecessary compare matrix (before result 
partitioning)
@@ -254,8 +254,8 @@ public class OptimizerRuleBased extends Optimizer
                {
                        if( M1 > _rm && M3 <= _rm  ) {
                                // rewrite 1: data partitioning (apply 
conditional partitioning)
-                               rewriteSetDataPartitioner( pn, 
ec.getVariables(), partitionedMatrices, M3 );
-                               M1 = 
_cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate              
+                               rewriteSetDataPartitioner( pn, 
ec.getVariables(), partitionedMatrices, M3, false );
+                               M1 = 
_cost.getEstimate(TestMeasure.MEMORY_USAGE, pn); //reestimate
                        }
                        
                        if( flagRecompMR ){
@@ -393,7 +393,7 @@ public class OptimizerRuleBased extends Optimizer
        //REWRITE set data partitioner
        ///
 
-       protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap 
vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM ) 
+       protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap 
vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM, 
boolean constrained ) 
        {
                if( n.getNodeType() != NodeType.PARFOR )
                        LOG.warn(getOptMode()+" OPT: Data partitioner can only 
be set for a ParFor node.");
@@ -417,7 +417,7 @@ public class OptimizerRuleBased extends Optimizer
                                double mem = getMemoryEstimate(c, vars);
                                if( dpf != PartitionFormat.NONE 
                                        && dpf._dpf != 
PDataPartitionFormat.BLOCK_WISE_M_N
-                                       && mem > _lm/2 && mem > _rm/2 ) {
+                                       && (constrained || (mem > _lm/2 && mem 
> _rm/2)) ) {
                                        cand2.put( c, dpf );
                                }
                        }

Reply via email to