Repository: systemml
Updated Branches:
  refs/heads/master 0751ddc0f -> 3acd94186


[SYSTEMML-1904] Improved codegen dynamic recompilation decisions

This patch improves the decision on marking HOPs and thus, HOP DAGs for
dynamic recompilation. For example, on AutoEncoder, the indexing of
batches causes unknowns in the main DAG of the inner loop. Since the
worst-case estimates are sufficient to compile these operations to CP,
this DAG is not marked for dynamic recompilation. Hence, the codegen
optimizer can never re-optimize this DAG again. We now use more
aggressive recompilation decisions for codegen, effectively marking
every HOP DAG with unknown dimensions for dynamic recompilation. 

On one epoch of AutoEncoder over Mnist60k, batch=512, H1=500, H2=2, this
change improved the end-to-end performance from 39s to 24s.

Furthermore, this also includes two minor improvements of codegen outer
templates: (1) improved performance of full aggregations due to less
array loads and stores, and (2) a fix of the fusion condition to avoid
transpose operations on the main input and intermediates.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/808a8f4f
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/808a8f4f
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/808a8f4f

Branch: refs/heads/master
Commit: 808a8f4f89a9586a9fd556170db723edd3be64f2
Parents: 0751ddc
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Tue Sep 12 00:15:26 2017 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Tue Sep 12 20:13:17 2017 -0700

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/hops/Hop.java                | 9 +++++++--
 .../sysml/hops/codegen/template/TemplateOuterProduct.java   | 3 ++-
 .../org/apache/sysml/runtime/codegen/SpoofOuterProduct.java | 6 ++++--
 3 files changed, 13 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/808a8f4f/src/main/java/org/apache/sysml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/Hop.java 
b/src/main/java/org/apache/sysml/hops/Hop.java
index b454771..7495d4f 100644
--- a/src/main/java/org/apache/sysml/hops/Hop.java
+++ b/src/main/java/org/apache/sysml/hops/Hop.java
@@ -1523,21 +1523,26 @@ public abstract class Hop implements ParseInfo
        
        /**
         * Marks the hop for dynamic recompilation, if dynamic recompilation is 
-        * enabled and one of the two basic scenarios apply:
+        * enabled and one of the three basic scenarios apply:
         * <ul>
         *  <li> The hop has unknown dimensions or sparsity and is scheduled 
for 
         *    remote execution, in which case the latency for distributed jobs 
easily 
         *    covers any recompilation overheads. </li>
         *  <li> The hop has unknown dimensions and is scheduled for local 
execution 
         *    due to forced single node execution type. </li>
+        *  <li> The hop has unknown dimensions and is scheduled for local 
execution 
+        *    due to good worst-case memory estimates but codegen is enabled, 
which
+        *    requires (mostly) known sizes to validity conditions and cost 
estimation. </li>
         * <ul> <p>
         */
        protected void setRequiresRecompileIfNecessary() {
                ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? 
ExecType.SPARK : ExecType.MR;
                boolean caseRemote = (!dimsKnown(true) && _etype == REMOTE);
                boolean caseLocal = (!dimsKnown() && _etypeForced == 
ExecType.CP);
+               boolean caseCodegen = (!dimsKnown() && 
ConfigurationManager.isCodegenEnabled());
                
-               if( ConfigurationManager.isDynamicRecompilation() && 
(caseRemote || caseLocal) )
+               if( ConfigurationManager.isDynamicRecompilation() 
+                       && (caseRemote || caseLocal || caseCodegen) )
                        setRequiresRecompile();
        }
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/808a8f4f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
index e4fb464..3756447 100644
--- 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
+++ 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
@@ -69,7 +69,8 @@ public class TemplateOuterProduct extends TemplateBase {
                        || (hop instanceof BinaryOp && 
TemplateUtils.isOperationSupported(hop)
                                && (TemplateUtils.isBinaryMatrixColVector(hop) 
|| HopRewriteUtils.isBinaryMatrixScalarOperation(hop)
                                || 
(HopRewriteUtils.isBinaryMatrixMatrixOperation(hop) && 
HopRewriteUtils.isBinary(hop, OpOp2.MULT, OpOp2.DIV)) )) 
-                       || (HopRewriteUtils.isTransposeOperation(hop) && 
!HopRewriteUtils.isOuterProductLikeMM(input)) 
+                       || (HopRewriteUtils.isTransposeOperation(hop) && input 
instanceof AggBinaryOp
+                               && 
!HopRewriteUtils.isOuterProductLikeMM(input)) 
                        || (hop instanceof AggBinaryOp && 
!HopRewriteUtils.isOuterProductLikeMM(hop)
                                && TemplateUtils.containsOuterProduct(input, 
HopRewriteUtils.getOtherInput(hop, input)))
                        || (hop instanceof AggUnaryOp && 
((AggUnaryOp)hop).getDirection()==Direction.RowCol));

http://git-wip-us.apache.org/repos/asf/systemml/blob/808a8f4f/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
index 1ec873f..c25a522 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
@@ -370,6 +370,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                
                final int blocksizeIJ = 16; //u/v block (max at typical L2 size)
                //blocked execution
+               double sum = 0;
                for( int bi = rl; bi < ru; bi+=blocksizeIJ )
                        for( int bj = cl, bimin = Math.min(ru, bi+blocksizeIJ); 
bj < cu; bj+=blocksizeIJ )
                        {
@@ -379,13 +380,14 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                for( int i=bi, ix=bi*n, uix=bi*k; i<bimin; i++, 
ix+=n, uix+=k )
                                        for( int j=bj, vix=bj*k; j<bjmin; j++, 
vix+=k)
                                                if( a[ix+j] != 0 ) {
-                                                       //int cix = (type == 
OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix;
                                                        if(type == 
OutProdType.CELLWISE_OUTER_PRODUCT)
                                                                c[ix+j] = 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j );
                                                        else
-                                                               c[0]  += 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j);
+                                                               sum += 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j);
                                                }
                        }
+               if( type != OutProdType.CELLWISE_OUTER_PRODUCT )
+                       c[0] = sum;
        }
        
        private void executeSparse(SparseBlock sblock,  double[] u, double[] v, 
double[][] b, double[] scalars,

Reply via email to