Repository: systemml
Updated Branches:
  refs/heads/master 16e803f2a -> 130096893


[SYSTEMML-1836] Reduced GC overhead codegen rowwise ops (static buffer) 

Scripts like Kmeans and Mlogreg showed unnecessarily large GC overhead
when ran with codegen enabled. These scripts heavily rely on rowwise
fused operators, which already used a thread-local buffer for row
intermediates (where the size of this buffer is derived from the cplan,
usually <10). However, this ring buffer used a poor implementation based
on linked lists, which created new objects per vector allocation (i.e.,
per row intermediate). 

This patch changes this implementation to a static array ring buffer,
which significantly improved end-to-end performance. For example, here
are the results for Kmeans and Mlogreg on a 10Mx10 scenario:

Baselines w/o codegen: Kmeans 1,399s, Mlogreg 507s.
Kmeans w/ codegen: 466s (102s GC) -> 326s (13s GC)
Mlogreg w/ codegen: 196s (37s GC) -> 134s (6s GC)

Furthermore, this patch also cleans up the statistics collection
(collect all outside loop and only for top-level problems), statistics
output formatting (mis-aligned outputs), and introduces a new util
function for integer power 2^x in order to increase readability while
leveraging the performance benefits of simply shifts.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/13009689
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/13009689
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/13009689

Branch: refs/heads/master
Commit: 13009689339b652b9f080f776da46f6d63edc7ee
Parents: 16e803f
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Thu Aug 10 20:23:10 2017 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Thu Aug 10 23:11:20 2017 -0700

----------------------------------------------------------------------
 .../codegen/opt/PlanSelectionFuseCostBased.java |  7 +-
 .../opt/PlanSelectionFuseCostBasedV2.java       | 80 +++++++++-----------
 .../hops/codegen/opt/ReachabilityGraph.java     | 11 +--
 .../runtime/codegen/LibSpoofPrimitives.java     | 69 ++++++++++-------
 .../sysml/runtime/codegen/SpoofRowwise.java     | 10 ++-
 .../sysml/runtime/util/UtilFunctions.java       |  7 +-
 .../java/org/apache/sysml/utils/Statistics.java | 63 ++++++++-------
 7 files changed, 133 insertions(+), 114 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java
 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java
index 985cc0f..4fcef6e 100644
--- 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java
+++ 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java
@@ -376,8 +376,6 @@ public class PlanSelectionFuseCostBased extends 
PlanSelection
                                
                                //cost assignment on hops
                                double C = getPlanCost(memo, partition, R, M, 
plan, computeCosts);
-                               if( DMLScript.STATISTICS )
-                                       
Statistics.incrementCodegenFPlanCompile(1);
                                if( LOG.isTraceEnabled() )
                                        LOG.trace("Enum: 
"+Arrays.toString(plan)+" -> "+C);
                                
@@ -390,6 +388,11 @@ public class PlanSelectionFuseCostBased extends 
PlanSelection
                                }
                        }
                        
+                       if( DMLScript.STATISTICS ) {
+                               Statistics.incrementCodegenEnumAll(len);
+                               Statistics.incrementCodegenEnumEval(len);
+                       }
+                       
                        //prune memo table wrt best plan and select plans
                        HashSet<Long> visited = new HashSet<Long>();
                        for( Long hopID : R )

http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index f818c06..60cfd0c 100644
--- 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -61,6 +61,7 @@ import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.runtime.codegen.LibSpoofPrimitives;
 import 
org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
+import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.sysml.utils.Statistics;
 
 /**
@@ -193,18 +194,15 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                //scan linearized search space, w/ skips for branch and bound 
pruning
                //and structural pruning (where we solve conditionally 
independent problems)
                //bestC is monotonically non-increasing and serves as the upper 
bound
-               long len = 1L << matPoints.length-off;
+               long len = UtilFunctions.pow(2, matPoints.length-off);
                boolean[] bestPlan = null;
-               long numEvalPlans = 0, numEvalPartialPlans = 0, numSkipPlans = 
0;
-
+               long numEvalPlans = 0, numEvalPartPlans = 0;
+               
                for( long i=0; i<len; i++ ) {
                        //construct assignment
                        boolean[] plan = createAssignment(matPoints.length-off, 
off, i);
-                       if( bestPlan == null )
-                               bestPlan = plan;
                        long pskip = 0; //skip after costing
-
-
+                       
                        //skip plans with structural pruning
                        if( USE_STRUCTURAL_PRUNING && (rgraph!=null) && 
rgraph.isCutSet(plan) ) {
                                //compute skip (which also acts as boundary for 
subproblems)
@@ -226,13 +224,12 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                        //skip plans with branch and bound pruning (cost)
                        else if( USE_COST_PRUNING ) {
                                double lbC = Math.max(costs._read, 
costs._compute) + costs._write
-                                               + getMaterializationCost(part, 
matPoints, memo, plan);
+                                       + getMaterializationCost(part, 
matPoints, memo, plan);
                                if( lbC >= bestC ) {
                                        long skip = getNumSkipPlans(plan);
                                        if( LOG.isTraceEnabled() )
                                                LOG.trace("Enum: Skip "+skip+" 
plans (by cost).");
                                        i += skip - 1;
-                                       numSkipPlans += skip;
                                        continue;
                                }
                        }
@@ -241,37 +238,34 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                        double C = getPlanCost(memo, part, matPoints, plan, 
costs._computeCosts, bestC);
                        if (LOG.isTraceEnabled())
                                LOG.trace("Enum: " + Arrays.toString(plan) + " 
-> " + C);
-                       if( C == Double.POSITIVE_INFINITY ) {
-                               numEvalPartialPlans++;
-                       } else {
-                               numEvalPlans++;
-                               //cost comparisons
-                               if( C < bestC ) {
-                                       bestC = C;
-                                       bestPlan = plan;
-                                       if( LOG.isTraceEnabled() )
-                                               LOG.trace("Enum: Found new best 
plan.");
-                               }
+                       numEvalPartPlans += (C==Double.POSITIVE_INFINITY) ? 1 : 
0;
+                       numEvalPlans++;
+                       
+                       //cost comparisons
+                       if( bestPlan == null || C < bestC ) {
+                               bestC = C;
+                               bestPlan = plan;
+                               if( LOG.isTraceEnabled() )
+                                       LOG.trace("Enum: Found new best plan.");
                        }
-
+                       
                        //post skipping
                        i += pskip;
-                       numSkipPlans += pskip;
                        if( pskip !=0 && LOG.isTraceEnabled() )
                                LOG.trace("Enum: Skip "+pskip+" plans (by 
structure).");
                }
                
                if( DMLScript.STATISTICS ) {
-                       Statistics.incrementCodegenFPlanCompile(numEvalPlans);
-                       
Statistics.incrementCodegenFPlanPartialCost(numEvalPartialPlans);
-                       Statistics.incrementCodegenFPlanSkip(numSkipPlans);
+                       
Statistics.incrementCodegenEnumAll((rgraph!=null)?len:0);
+                       Statistics.incrementCodegenEnumEval(numEvalPlans);
+                       Statistics.incrementCodegenEnumEvalP(numEvalPartPlans);
                }
                if( LOG.isTraceEnabled() )
                        LOG.trace("Enum: Optimal plan: 
"+Arrays.toString(bestPlan));
                
                //copy best plan w/o fixed offset plan
-               assert bestPlan != null;
-               return Arrays.copyOfRange(bestPlan, off, bestPlan.length);
+               return (bestPlan==null) ? new boolean[matPoints.length-off] :
+                       Arrays.copyOfRange(bestPlan, off, bestPlan.length);
        }
        
        private static boolean[] createAssignment(int len, int off, long pos) {
@@ -279,7 +273,7 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                Arrays.fill(ret, 0, off, true);
                long tmp = pos;
                for( int i=0; i<len; i++ ) {
-                       long mask = 1L << len-i-1;
+                       long mask = UtilFunctions.pow(2, len-i-1);
                        ret[off+i] = tmp >= mask;
                        tmp %= mask;
                }
@@ -288,7 +282,7 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
        
        private static long getNumSkipPlans(boolean[] plan) {
                int pos = ArrayUtils.lastIndexOf(plan, true);
-               return 1L << plan.length-pos-1;
+               return UtilFunctions.pow(2, plan.length-pos-1);
        }
        
        private static double getMaterializationCost(PlanPartition part, 
InterestingPoint[] M, CPlanMemoTable memo, boolean[] plan) {
@@ -771,7 +765,7 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
        
        private static double getPlanCost(CPlanMemoTable memo, PlanPartition 
part, 
                        InterestingPoint[] matPoints,boolean[] plan, 
HashMap<Long, Double> computeCosts,
-                       final double bestC)
+                       final double costBound)
        {
                //high level heuristic: every hop or fused operator has the 
following cost: 
                //WRITE + max(COMPUTE, READ), where WRITE costs are given by 
the output size, 
@@ -783,26 +777,22 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                int rem = part.getRoots().size();
                for( Long hopID : part.getRoots() ) {
                        costs += rGetPlanCosts(memo, 
memo.getHopRefs().get(hopID), 
-                               visited, part, matPoints, plan, computeCosts, 
null, null, bestC - costs);
-                       rem--;
-                       // stop early if we exceed bestC
-                       if( costs >= bestC && rem > 0 ) {
-                               costs = Double.POSITIVE_INFINITY;
-                               break;
-                       }
+                               visited, part, matPoints, plan, computeCosts, 
null, null, costBound-costs);
+                       if( costs >= costBound && --rem > 0 ) //stop early
+                               return Double.POSITIVE_INFINITY;
                }
                return costs;
        }
        
        private static double rGetPlanCosts(CPlanMemoTable memo, final Hop 
current, HashSet<VisitMarkCost> visited,
                        PlanPartition part, InterestingPoint[] matPoints, 
boolean[] plan, HashMap<Long, Double> computeCosts,
-                       CostVector costsCurrent, TemplateType currentType, 
final double costBudget)
+                       CostVector costsCurrent, TemplateType currentType, 
final double costBound)
        {
                final long currentHopId = current.getHopID();
                //memoization per hop id and cost vector to account for 
redundant
                //computation without double counting materialized results or 
compute
                //costs of complex operation DAGs within a single fused operator
-               if( !visited.add(new VisitMarkCost(current.getHopID(), 
+               if( !visited.add(new VisitMarkCost(currentHopId, 
                        (costsCurrent==null || 
currentType==TemplateType.MAGG)?0:costsCurrent.ID)) )
                        return 0; //already existing 
                
@@ -839,8 +829,8 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                                for( int i=1; i<3; i++ ) {
                                        if( !best.isPlanRef(i) ) continue;
                                        costs += rGetPlanCosts(memo, 
memo.getHopRefs().get(best.input(i)), visited, 
-                                               part, matPoints, plan, 
computeCosts, costVect, TemplateType.MAGG, costBudget - costs);
-                                       if( costs >= costBudget )
+                                               part, matPoints, plan, 
computeCosts, costVect, TemplateType.MAGG, costBound-costs);
+                                       if( costs >= costBound )
                                                return Double.POSITIVE_INFINITY;
                                }
                        //skip other multi-agg roots
@@ -856,17 +846,17 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                        Hop c = current.getInput().get(i);
                        if( best!=null && best.isPlanRef(i) )
                                costs += rGetPlanCosts(memo, c, visited, part, 
matPoints,
-                                               plan, computeCosts, costVect, 
best.type, costBudget - costs);
+                                               plan, computeCosts, costVect, 
best.type, costBound-costs);
                        else if( best!=null && isImplicitlyFused(current, i, 
best.type) )
                                
costVect.addInputSize(c.getInput().get(0).getHopID(), getSize(c));
                        else { //include children and I/O costs
                                if( part.getPartition().contains(c.getHopID()) )
                                        costs += rGetPlanCosts(memo, c, 
visited, part, matPoints,
-                                               plan, computeCosts, null, null, 
costBudget - costs);
+                                               plan, computeCosts, null, null, 
costBound-costs);
                                if( costVect != null && 
c.getDataType().isMatrix() )
                                        costVect.addInputSize(c.getHopID(), 
getSize(c));
                        }
-                       if( costs >= costBudget )
+                       if( costs >= costBound )
                                return Double.POSITIVE_INFINITY;
                }
                
@@ -889,7 +879,7 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                //add costs for non-partition read in the middle of fused 
operator
                else if( part.getExtConsumed().contains(current.getHopID()) ) {
                        costs += rGetPlanCosts(memo, current, visited, part, 
matPoints, plan,
-                               computeCosts, null, null, costBudget - costs);
+                               computeCosts, null, null, costBound - costs);
                }
                
                //sanity check non-negative costs

http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java 
b/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
index fb7c8d9..0c829e8 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java
@@ -33,6 +33,7 @@ import org.apache.commons.lang3.tuple.Pair;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.codegen.template.CPlanMemoTable;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
+import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.sysml.hops.codegen.opt.PlanSelection.VisitMarkCost;
 
 /**
@@ -175,7 +176,7 @@ public class ReachabilityGraph
                for( CutSet cs : _cutSets )
                        if( isCutSet(cs, plan) ) {
                                int pos = cs.posCut[cs.posCut.length-1];        
                        
-                               return 1L << plan.length-pos-1;
+                               return UtilFunctions.pow(2, plan.length-pos-1);
                        }
                throw new RuntimeException("Failed to compute "
                        + "number of skip plans for plan without cutset.");
@@ -240,11 +241,11 @@ public class ReachabilityGraph
                        if( !CollectionUtils.containsAny(part1, part2) 
                                && !part1.isEmpty() && !part2.isEmpty()) {
                                //score cutsets (smaller is better)
-                               double base = 1L << _matPoints.size();
-                               double numComb = 1L << cand.size();
+                               double base = UtilFunctions.pow(2, 
_matPoints.size());
+                               double numComb = UtilFunctions.pow(2, 
cand.size());
                                double score = (numComb-1)/numComb * base
-                                       + 1/numComb * (1L << part1.size())
-                                       + 1/numComb * (1L << part2.size());
+                                       + 1/numComb * UtilFunctions.pow(2, 
part1.size())
+                                       + 1/numComb * UtilFunctions.pow(2, 
part2.size());
                                
                                //construct cutset
                                cutSets.add(Pair.of(new CutSet(

http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java 
b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index 83ee425..80586b1 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -20,8 +20,6 @@
 package org.apache.sysml.runtime.codegen;
 
 import java.util.Arrays;
-import java.util.Iterator;
-import java.util.LinkedList;
 
 import org.apache.commons.math3.util.FastMath;
 import org.apache.sysml.runtime.functionobjects.IntegerDivide;
@@ -42,8 +40,8 @@ public class LibSpoofPrimitives
        
        //global pool of reusable vectors, individual operations set up their 
own thread-local
        //ring buffers of reusable vectors with specific number of vectors and 
vector sizes 
-       private static ThreadLocal<LinkedList<double[]>> memPool = new 
ThreadLocal<LinkedList<double[]>>() {
-               @Override protected LinkedList<double[]> initialValue() { 
return new LinkedList<double[]>(); }
+       private static ThreadLocal<VectorBuffer> memPool = new 
ThreadLocal<VectorBuffer>() {
+               @Override protected VectorBuffer initialValue() { return new 
VectorBuffer(0,0,0); }
        };
        
        // forwarded calls to LibMatrixMult
@@ -1444,13 +1442,7 @@ public class LibSpoofPrimitives
        }
        
        public static void setupThreadLocalMemory(int numVectors, int len, int 
len2) {
-               LinkedList<double[]> list = new LinkedList<double[]>();
-               if( len2 >= 0 ) 
-                       for( int i=0; i<numVectors; i++ )
-                               list.addLast(new double[len2]);
-               for( int i=0; i<numVectors; i++ )
-                       list.addLast(new double[len]);
-               memPool.set(list);
+               memPool.set(new VectorBuffer(numVectors, len, len2));
        }
        
        public static void cleanupThreadLocalMemory() {
@@ -1462,29 +1454,52 @@ public class LibSpoofPrimitives
        }
        
        protected static double[] allocVector(int len, boolean reset, double 
resetVal) {
-               LinkedList<double[]> list = memPool.get(); 
+               VectorBuffer buff = memPool.get(); 
                
-               //find and remove vector with matching len 
-               double[] vect = null;
-               Iterator<double[]> iter = list.iterator();
-               while( iter.hasNext() ) {
-                       double[] tmp = iter.next();
-                       if( tmp.length == len ) {
-                               vect = tmp;
-                               iter.remove();
-                               break;
-                       }
-               }
-               
-               //allocate new vector or re-queue if required
+               //find next matching vector in ring buffer or
+               //allocate new vector if required
+               double[] vect = buff.next(len);
                if( vect == null )
                        vect = new double[len];
-               else 
-                       list.addLast(vect);
                
                //reset vector if required
                if( reset )
                        Arrays.fill(vect, resetVal);
                return vect;
        }
+       
+       /**
+        * Simple ring buffer of allocated vectors, where
+        * vectors of different sizes are interspersed.
+        */
+       private static class VectorBuffer {
+               private final double[][] _data;
+               private int _pos;
+               private int _len1;
+               private int _len2;
+               
+               public VectorBuffer(int num, int len1, int len2) {
+                       int lnum = (len2 > 0) ? 2*num : num;
+                       _data = new double[lnum][];
+                       for( int i=0; i<num; i++ )
+                               if( lnum > num ) {
+                                       _data[2*i] = new double[len1];
+                                       _data[2*i+1] = new double[len2];
+                               }
+                               else {
+                                       _data[i] = new double[len1];
+                               }
+                       _pos = -1;
+                       _len1 = len1;
+                       _len2 = len2;
+               }
+               public double[] next(int len) {
+                       if( _len1!=len && _len2!=len )
+                               return null;
+                       do {
+                               _pos = (_pos+1>=_data.length) ? 0 : _pos+1;
+                       } while( _data[_pos].length!=len );
+                       return _data[_pos];
+               }
+       }
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index abe2e78..9f47733 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -122,7 +122,7 @@ public abstract class SpoofRowwise extends SpoofOperator
                //result allocation and preparations
                final int m = inputs.get(0).getNumRows();
                final int n = inputs.get(0).getNumColumns();
-               final int n2 = _type.isRowTypeB1() ?
+               final int n2 = _type.isRowTypeB1() || 
hasMatrixSideInput(inputs) ?
                        getMinColsMatrixSideInputs(inputs) : -1;
                if( !aggIncr || !out.isAllocated() )
                        allocateOutputMatrix(m, n, n2, out);
@@ -170,7 +170,7 @@ public abstract class SpoofRowwise extends SpoofOperator
                //result allocation and preparations
                final int m = inputs.get(0).getNumRows();
                final int n = inputs.get(0).getNumColumns();
-               final int n2 = _type.isRowTypeB1() ?
+               final int n2 = _type.isRowTypeB1() || 
hasMatrixSideInput(inputs) ?
                        getMinColsMatrixSideInputs(inputs) : -1;
                allocateOutputMatrix(m, n, n2, out);
                
@@ -217,6 +217,12 @@ public abstract class SpoofRowwise extends SpoofOperator
                }
        }
        
+       public static boolean hasMatrixSideInput(ArrayList<MatrixBlock> inputs) 
{
+               return IntStream.range(1, inputs.size())
+                       .mapToObj(i -> inputs.get(i))
+                       .anyMatch(in -> in.getNumColumns()>1);
+       }
+       
        private static int getMinColsMatrixSideInputs(ArrayList<MatrixBlock> 
inputs) {
                //For B1 types, get the output number of columns as the minimum
                //number of columns of side input matrices other than vectors.

http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java 
b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
index cec0fb0..891dbc9 100644
--- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
@@ -87,10 +87,15 @@ public class UtilFunctions
        
        public static int nextIntPow2( int in ) {
                int expon = (in==0) ? 0 : 32-Integer.numberOfLeadingZeros(in-1);
-               long pow2 = (long) Math.pow(2, expon);
+               long pow2 = pow(2, expon);
                return (int)((pow2>Integer.MAX_VALUE)?Integer.MAX_VALUE : 
pow2);        
        }
        
+       public static long pow(int base, int exp) {
+               return (base==2 && 0 <= exp && exp < 63) ?
+                       1L << exp : (long)Math.pow(base, exp);
+       }
+       
        /**
         * Computes the 1-based block index based on the global cell index and 
block size meta
         * data. See computeCellIndex for the inverse operation.

http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/utils/Statistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java 
b/src/main/java/org/apache/sysml/utils/Statistics.java
index b294ba6..5ff2762 100644
--- a/src/main/java/org/apache/sysml/utils/Statistics.java
+++ b/src/main/java/org/apache/sysml/utils/Statistics.java
@@ -76,11 +76,11 @@ public class Statistics
        private static final LongAdder codegenCompileTime = new LongAdder(); 
//in nano
        private static final LongAdder codegenClassCompileTime = new 
LongAdder(); //in nano
        private static final LongAdder codegenHopCompile = new LongAdder(); 
//count
-       private static final LongAdder codegenFPlanCompile = new LongAdder(); 
//count
-       private static final LongAdder codegenFPlanPartialCost = new 
LongAdder(); //count
-       private static final LongAdder codegenFPlanSkip = new LongAdder(); 
//count
        private static final LongAdder codegenCPlanCompile = new LongAdder(); 
//count
        private static final LongAdder codegenClassCompile = new LongAdder(); 
//count
+       private static final LongAdder codegenEnumAll = new LongAdder(); //count
+       private static final LongAdder codegenEnumEval = new LongAdder(); 
//count
+       private static final LongAdder codegenEnumEvalP = new LongAdder(); 
//count
        private static final LongAdder codegenPlanCacheHits = new LongAdder(); 
//count
        private static final LongAdder codegenPlanCacheTotal = new LongAdder(); 
//count
        
@@ -259,14 +259,14 @@ public class Statistics
                codegenCPlanCompile.add(delta);
        }
        
-       public static void incrementCodegenFPlanCompile(long delta) {
-               codegenFPlanCompile.add(delta);
+       public static void incrementCodegenEnumAll(long delta) {
+               codegenEnumAll.add(delta);
        }
-       public static void incrementCodegenFPlanPartialCost(long delta) {
-               codegenFPlanPartialCost.add(delta);
+       public static void incrementCodegenEnumEval(long delta) {
+               codegenEnumEval.add(delta);
        }
-       public static void incrementCodegenFPlanSkip(long delta) {
-               codegenFPlanSkip.add(delta);
+       public static void incrementCodegenEnumEvalP(long delta) {
+               codegenEnumEvalP.add(delta);
        }
        
        public static void incrementCodegenClassCompile() {
@@ -297,14 +297,14 @@ public class Statistics
                return codegenCPlanCompile.longValue();
        }
        
-       public static long getCodegenFPlanCompile() {
-               return codegenFPlanCompile.longValue();
+       public static long getCodegenEnumAll() {
+               return codegenEnumAll.longValue();
        }
-       public static long getCodegenFPlanPartialCost() {
-               return codegenFPlanPartialCost.longValue();
+       public static long getCodegenEnumEval() {
+               return codegenEnumEval.longValue();
        }
-       public static long getCodegenFPlanSkip() {
-               return codegenFPlanSkip.longValue();
+       public static long getCodegenEnumEvalP() {
+               return codegenEnumEvalP.longValue();
        }
        
        public static long getCodegenClassCompile() {
@@ -400,11 +400,11 @@ public class Statistics
                funRecompileTime.reset();
                
                codegenHopCompile.reset();
-               codegenFPlanCompile.reset();
-               codegenFPlanPartialCost.reset();
-               codegenFPlanSkip.reset();
                codegenCPlanCompile.reset();
                codegenClassCompile.reset();
+               codegenEnumAll.reset();
+               codegenEnumEval.reset();
+               codegenEnumEvalP.reset();
                codegenCompileTime.reset();
                codegenClassCompileTime.reset();
                
@@ -741,10 +741,10 @@ public class Statistics
                
                sb.append("SystemML Statistics:\n");
                if( DMLScript.STATISTICS ) {
-                       sb.append("Total elapsed time:\t\t\t\t" + 
String.format("%.3f", (getCompileTime()+getRunTime())*1e-9) + " sec.\n"); // 
nanoSec --> sec
-                       sb.append("Total compilation time:\t\t\t" + 
String.format("%.3f", getCompileTime()*1e-9) + " sec.\n"); // nanoSec --> sec
+                       sb.append("Total elapsed time:\t\t" + 
String.format("%.3f", (getCompileTime()+getRunTime())*1e-9) + " sec.\n"); // 
nanoSec --> sec
+                       sb.append("Total compilation time:\t\t" + 
String.format("%.3f", getCompileTime()*1e-9) + " sec.\n"); // nanoSec --> sec
                }
-               sb.append("Total execution time:\t\t\t" + String.format("%.3f", 
getRunTime()*1e-9) + " sec.\n"); // nanoSec --> sec
+               sb.append("Total execution time:\t\t" + String.format("%.3f", 
getRunTime()*1e-9) + " sec.\n"); // nanoSec --> sec
                if( OptimizerUtils.isSparkExecutionMode() ) {
                        if( DMLScript.STATISTICS ) //moved into stats on Shiv's 
request
                                sb.append("Number of compiled Spark inst:\t" + 
getNoOfCompiledSPInst() + ".\n");
@@ -780,29 +780,28 @@ public class Statistics
                        }
                        
                        sb.append("Cache hits (Mem, WB, FS, HDFS):\t" + 
CacheStatistics.displayHits() + ".\n");
-                       sb.append("Cache writes (WB, FS, HDFS):   \t" + 
CacheStatistics.displayWrites() + ".\n");
+                       sb.append("Cache writes (WB, FS, HDFS):\t" + 
CacheStatistics.displayWrites() + ".\n");
                        sb.append("Cache times (ACQr/m, RLS, EXP):\t" + 
CacheStatistics.displayTime() + " sec.\n");
                        sb.append("HOP DAGs recompiled (PRED, SB):\t" + 
getHopRecompiledPredDAGs() + "/" + getHopRecompiledSBDAGs() + ".\n");
-                       sb.append("HOP DAGs recompile time:       \t" + 
String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n");
+                       sb.append("HOP DAGs recompile time:\t" + 
String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n");
                        if( getFunRecompiles()>0 ) {
                                sb.append("Functions recompiled:\t\t" + 
getFunRecompiles() + ".\n");
                                sb.append("Functions recompile time:\t" + 
String.format("%.3f", ((double)getFunRecompileTime())/1000000000) + " sec.\n"); 
      
                        }
                        if( ConfigurationManager.isCodegenEnabled() ) {
-                               sb.append("Codegen compile    (DAG,CP,JC):\t" + 
getCodegenDAGCompile() + "/"
+                               sb.append("Codegen compile (DAG,CP,JC):\t" + 
getCodegenDAGCompile() + "/"
                                                + getCodegenCPlanCompile() + 
"/" + getCodegenClassCompile() + ".\n");
-                               sb.append("Codegen enum  (full,part,skip):\t" + 
getCodegenFPlanCompile() + "/"
-                                               + getCodegenFPlanPartialCost() 
+ "/"
-                                               + getCodegenFPlanSkip() + 
".\n");
+                               sb.append("Codegen enum (All,Eval,EvalP):\t" + 
getCodegenEnumAll() + "/"
+                                               + getCodegenEnumEval() + "/" + 
getCodegenEnumEvalP() + ".\n");
                                sb.append("Codegen compile times (DAG,JC):\t" + 
String.format("%.3f", (double)getCodegenCompileTime()/1000000000) + "/" + 
                                                String.format("%.3f", 
(double)getCodegenClassCompileTime()/1000000000)  + " sec.\n");
-                               sb.append("Codegen plan cache hits:       \t" + 
getCodegenPlanCacheHits() + "/" + getCodegenPlanCacheTotal() + ".\n");
+                               sb.append("Codegen plan cache hits:\t" + 
getCodegenPlanCacheHits() + "/" + getCodegenPlanCacheTotal() + ".\n");
                        }
                        if( OptimizerUtils.isSparkExecutionMode() ){
                                String lazy = 
SparkExecutionContext.isLazySparkContextCreation() ? "(lazy)" : "(eager)";
                                sb.append("Spark ctx create time "+lazy+":\t"+
                                                String.format("%.3f", 
((double)sparkCtxCreateTime)*1e-9)  + " sec.\n" ); // nanoSec --> sec
-                               sb.append("Spark trans counts(par,bc,col):\t" +
+                               sb.append("Spark trans counts (par,bc,col):" +
                                                String.format("%d/%d/%d.\n", 
sparkParallelizeCount.longValue(), 
                                                                
sparkBroadcastCount.longValue(), sparkCollectCount.longValue()));
                                sb.append("Spark trans times (par,bc,col):\t" +
@@ -819,9 +818,9 @@ public class Statistics
                                sb.append("ParFor total update in-place:\t" + 
lTotalUIPVar + "/" + lTotalLixUIP + "/" + lTotalLix + "\n");
                        }
 
-                       sb.append("Total JIT compile time:\t\t\t" + 
((double)getJITCompileTime())/1000 + " sec.\n");
-                       sb.append("Total JVM GC count:\t\t\t\t" + 
getJVMgcCount() + ".\n");
-                       sb.append("Total JVM GC time:\t\t\t\t" + 
((double)getJVMgcTime())/1000 + " sec.\n");
+                       sb.append("Total JIT compile time:\t\t" + 
((double)getJITCompileTime())/1000 + " sec.\n");
+                       sb.append("Total JVM GC count:\t\t" + getJVMgcCount() + 
".\n");
+                       sb.append("Total JVM GC time:\t\t" + 
((double)getJVMgcTime())/1000 + " sec.\n");
                        LibMatrixDNN.appendStatistics(sb);
                        sb.append("Heavy hitter instructions:\n" + 
getHeavyHitters(maxHeavyHitters));
                }

Reply via email to