Repository: systemml Updated Branches: refs/heads/master 16e803f2a -> 130096893
[SYSTEMML-1836] Reduced GC overhead codegen rowwise ops (static buffer) Scripts like Kmeans and Mlogreg showed unnecessarily large GC overhead when ran with codegen enabled. These scripts heavily rely on rowwise fused operators, which already used a thread-local buffer for row intermediates (where the size of this buffer is derived from the cplan, usually <10). However, this ring buffer used a poor implementation based on linked lists, which created new objects per vector allocation (i.e., per row intermediate). This patch changes this implementation to a static array ring buffer, which significantly improved end-to-end performance. For example, here are the results for Kmeans and Mlogreg on a 10Mx10 scenario: Baselines w/o codegen: Kmeans 1,399s, Mlogreg 507s. Kmeans w/ codegen: 466s (102s GC) -> 326s (13s GC) Mlogreg w/ codegen: 196s (37s GC) -> 134s (6s GC) Furthermore, this patch also cleans up the statistics collection (collect all outside loop and only for top-level problems), statistics output formatting (mis-aligned outputs), and introduces a new util function for integer power 2^x in order to increase readability while leveraging the performance benefits of simply shifts. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/13009689 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/13009689 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/13009689 Branch: refs/heads/master Commit: 13009689339b652b9f080f776da46f6d63edc7ee Parents: 16e803f Author: Matthias Boehm <mboe...@gmail.com> Authored: Thu Aug 10 20:23:10 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Thu Aug 10 23:11:20 2017 -0700 ---------------------------------------------------------------------- .../codegen/opt/PlanSelectionFuseCostBased.java | 7 +- .../opt/PlanSelectionFuseCostBasedV2.java | 80 +++++++++----------- .../hops/codegen/opt/ReachabilityGraph.java | 11 +-- .../runtime/codegen/LibSpoofPrimitives.java | 69 ++++++++++------- .../sysml/runtime/codegen/SpoofRowwise.java | 10 ++- .../sysml/runtime/util/UtilFunctions.java | 7 +- .../java/org/apache/sysml/utils/Statistics.java | 63 ++++++++------- 7 files changed, 133 insertions(+), 114 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java index 985cc0f..4fcef6e 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java +++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBased.java @@ -376,8 +376,6 @@ public class PlanSelectionFuseCostBased extends PlanSelection //cost assignment on hops double C = getPlanCost(memo, partition, R, M, plan, computeCosts); - if( DMLScript.STATISTICS ) - Statistics.incrementCodegenFPlanCompile(1); if( LOG.isTraceEnabled() ) LOG.trace("Enum: "+Arrays.toString(plan)+" -> "+C); @@ -390,6 +388,11 @@ public class PlanSelectionFuseCostBased extends PlanSelection } } + if( DMLScript.STATISTICS ) { + Statistics.incrementCodegenEnumAll(len); + Statistics.incrementCodegenEnumEval(len); + } + //prune memo table wrt best plan and select plans HashSet<Long> visited = new HashSet<Long>(); for( Long hopID : R ) http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java index f818c06..60cfd0c 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java +++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java @@ -61,6 +61,7 @@ import org.apache.sysml.hops.rewrite.HopRewriteUtils; import org.apache.sysml.runtime.codegen.LibSpoofPrimitives; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence; +import org.apache.sysml.runtime.util.UtilFunctions; import org.apache.sysml.utils.Statistics; /** @@ -193,18 +194,15 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection //scan linearized search space, w/ skips for branch and bound pruning //and structural pruning (where we solve conditionally independent problems) //bestC is monotonically non-increasing and serves as the upper bound - long len = 1L << matPoints.length-off; + long len = UtilFunctions.pow(2, matPoints.length-off); boolean[] bestPlan = null; - long numEvalPlans = 0, numEvalPartialPlans = 0, numSkipPlans = 0; - + long numEvalPlans = 0, numEvalPartPlans = 0; + for( long i=0; i<len; i++ ) { //construct assignment boolean[] plan = createAssignment(matPoints.length-off, off, i); - if( bestPlan == null ) - bestPlan = plan; long pskip = 0; //skip after costing - - + //skip plans with structural pruning if( USE_STRUCTURAL_PRUNING && (rgraph!=null) && rgraph.isCutSet(plan) ) { //compute skip (which also acts as boundary for subproblems) @@ -226,13 +224,12 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection //skip plans with branch and bound pruning (cost) else if( USE_COST_PRUNING ) { double lbC = Math.max(costs._read, costs._compute) + costs._write - + getMaterializationCost(part, matPoints, memo, plan); + + getMaterializationCost(part, matPoints, memo, plan); if( lbC >= bestC ) { long skip = getNumSkipPlans(plan); if( LOG.isTraceEnabled() ) LOG.trace("Enum: Skip "+skip+" plans (by cost)."); i += skip - 1; - numSkipPlans += skip; continue; } } @@ -241,37 +238,34 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection double C = getPlanCost(memo, part, matPoints, plan, costs._computeCosts, bestC); if (LOG.isTraceEnabled()) LOG.trace("Enum: " + Arrays.toString(plan) + " -> " + C); - if( C == Double.POSITIVE_INFINITY ) { - numEvalPartialPlans++; - } else { - numEvalPlans++; - //cost comparisons - if( C < bestC ) { - bestC = C; - bestPlan = plan; - if( LOG.isTraceEnabled() ) - LOG.trace("Enum: Found new best plan."); - } + numEvalPartPlans += (C==Double.POSITIVE_INFINITY) ? 1 : 0; + numEvalPlans++; + + //cost comparisons + if( bestPlan == null || C < bestC ) { + bestC = C; + bestPlan = plan; + if( LOG.isTraceEnabled() ) + LOG.trace("Enum: Found new best plan."); } - + //post skipping i += pskip; - numSkipPlans += pskip; if( pskip !=0 && LOG.isTraceEnabled() ) LOG.trace("Enum: Skip "+pskip+" plans (by structure)."); } if( DMLScript.STATISTICS ) { - Statistics.incrementCodegenFPlanCompile(numEvalPlans); - Statistics.incrementCodegenFPlanPartialCost(numEvalPartialPlans); - Statistics.incrementCodegenFPlanSkip(numSkipPlans); + Statistics.incrementCodegenEnumAll((rgraph!=null)?len:0); + Statistics.incrementCodegenEnumEval(numEvalPlans); + Statistics.incrementCodegenEnumEvalP(numEvalPartPlans); } if( LOG.isTraceEnabled() ) LOG.trace("Enum: Optimal plan: "+Arrays.toString(bestPlan)); //copy best plan w/o fixed offset plan - assert bestPlan != null; - return Arrays.copyOfRange(bestPlan, off, bestPlan.length); + return (bestPlan==null) ? new boolean[matPoints.length-off] : + Arrays.copyOfRange(bestPlan, off, bestPlan.length); } private static boolean[] createAssignment(int len, int off, long pos) { @@ -279,7 +273,7 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection Arrays.fill(ret, 0, off, true); long tmp = pos; for( int i=0; i<len; i++ ) { - long mask = 1L << len-i-1; + long mask = UtilFunctions.pow(2, len-i-1); ret[off+i] = tmp >= mask; tmp %= mask; } @@ -288,7 +282,7 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection private static long getNumSkipPlans(boolean[] plan) { int pos = ArrayUtils.lastIndexOf(plan, true); - return 1L << plan.length-pos-1; + return UtilFunctions.pow(2, plan.length-pos-1); } private static double getMaterializationCost(PlanPartition part, InterestingPoint[] M, CPlanMemoTable memo, boolean[] plan) { @@ -771,7 +765,7 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection private static double getPlanCost(CPlanMemoTable memo, PlanPartition part, InterestingPoint[] matPoints,boolean[] plan, HashMap<Long, Double> computeCosts, - final double bestC) + final double costBound) { //high level heuristic: every hop or fused operator has the following cost: //WRITE + max(COMPUTE, READ), where WRITE costs are given by the output size, @@ -783,26 +777,22 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection int rem = part.getRoots().size(); for( Long hopID : part.getRoots() ) { costs += rGetPlanCosts(memo, memo.getHopRefs().get(hopID), - visited, part, matPoints, plan, computeCosts, null, null, bestC - costs); - rem--; - // stop early if we exceed bestC - if( costs >= bestC && rem > 0 ) { - costs = Double.POSITIVE_INFINITY; - break; - } + visited, part, matPoints, plan, computeCosts, null, null, costBound-costs); + if( costs >= costBound && --rem > 0 ) //stop early + return Double.POSITIVE_INFINITY; } return costs; } private static double rGetPlanCosts(CPlanMemoTable memo, final Hop current, HashSet<VisitMarkCost> visited, PlanPartition part, InterestingPoint[] matPoints, boolean[] plan, HashMap<Long, Double> computeCosts, - CostVector costsCurrent, TemplateType currentType, final double costBudget) + CostVector costsCurrent, TemplateType currentType, final double costBound) { final long currentHopId = current.getHopID(); //memoization per hop id and cost vector to account for redundant //computation without double counting materialized results or compute //costs of complex operation DAGs within a single fused operator - if( !visited.add(new VisitMarkCost(current.getHopID(), + if( !visited.add(new VisitMarkCost(currentHopId, (costsCurrent==null || currentType==TemplateType.MAGG)?0:costsCurrent.ID)) ) return 0; //already existing @@ -839,8 +829,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection for( int i=1; i<3; i++ ) { if( !best.isPlanRef(i) ) continue; costs += rGetPlanCosts(memo, memo.getHopRefs().get(best.input(i)), visited, - part, matPoints, plan, computeCosts, costVect, TemplateType.MAGG, costBudget - costs); - if( costs >= costBudget ) + part, matPoints, plan, computeCosts, costVect, TemplateType.MAGG, costBound-costs); + if( costs >= costBound ) return Double.POSITIVE_INFINITY; } //skip other multi-agg roots @@ -856,17 +846,17 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection Hop c = current.getInput().get(i); if( best!=null && best.isPlanRef(i) ) costs += rGetPlanCosts(memo, c, visited, part, matPoints, - plan, computeCosts, costVect, best.type, costBudget - costs); + plan, computeCosts, costVect, best.type, costBound-costs); else if( best!=null && isImplicitlyFused(current, i, best.type) ) costVect.addInputSize(c.getInput().get(0).getHopID(), getSize(c)); else { //include children and I/O costs if( part.getPartition().contains(c.getHopID()) ) costs += rGetPlanCosts(memo, c, visited, part, matPoints, - plan, computeCosts, null, null, costBudget - costs); + plan, computeCosts, null, null, costBound-costs); if( costVect != null && c.getDataType().isMatrix() ) costVect.addInputSize(c.getHopID(), getSize(c)); } - if( costs >= costBudget ) + if( costs >= costBound ) return Double.POSITIVE_INFINITY; } @@ -889,7 +879,7 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection //add costs for non-partition read in the middle of fused operator else if( part.getExtConsumed().contains(current.getHopID()) ) { costs += rGetPlanCosts(memo, current, visited, part, matPoints, plan, - computeCosts, null, null, costBudget - costs); + computeCosts, null, null, costBound - costs); } //sanity check non-negative costs http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java b/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java index fb7c8d9..0c829e8 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java +++ b/src/main/java/org/apache/sysml/hops/codegen/opt/ReachabilityGraph.java @@ -33,6 +33,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.sysml.hops.Hop; import org.apache.sysml.hops.codegen.template.CPlanMemoTable; import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence; +import org.apache.sysml.runtime.util.UtilFunctions; import org.apache.sysml.hops.codegen.opt.PlanSelection.VisitMarkCost; /** @@ -175,7 +176,7 @@ public class ReachabilityGraph for( CutSet cs : _cutSets ) if( isCutSet(cs, plan) ) { int pos = cs.posCut[cs.posCut.length-1]; - return 1L << plan.length-pos-1; + return UtilFunctions.pow(2, plan.length-pos-1); } throw new RuntimeException("Failed to compute " + "number of skip plans for plan without cutset."); @@ -240,11 +241,11 @@ public class ReachabilityGraph if( !CollectionUtils.containsAny(part1, part2) && !part1.isEmpty() && !part2.isEmpty()) { //score cutsets (smaller is better) - double base = 1L << _matPoints.size(); - double numComb = 1L << cand.size(); + double base = UtilFunctions.pow(2, _matPoints.size()); + double numComb = UtilFunctions.pow(2, cand.size()); double score = (numComb-1)/numComb * base - + 1/numComb * (1L << part1.size()) - + 1/numComb * (1L << part2.size()); + + 1/numComb * UtilFunctions.pow(2, part1.size()) + + 1/numComb * UtilFunctions.pow(2, part2.size()); //construct cutset cutSets.add(Pair.of(new CutSet( http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java index 83ee425..80586b1 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java @@ -20,8 +20,6 @@ package org.apache.sysml.runtime.codegen; import java.util.Arrays; -import java.util.Iterator; -import java.util.LinkedList; import org.apache.commons.math3.util.FastMath; import org.apache.sysml.runtime.functionobjects.IntegerDivide; @@ -42,8 +40,8 @@ public class LibSpoofPrimitives //global pool of reusable vectors, individual operations set up their own thread-local //ring buffers of reusable vectors with specific number of vectors and vector sizes - private static ThreadLocal<LinkedList<double[]>> memPool = new ThreadLocal<LinkedList<double[]>>() { - @Override protected LinkedList<double[]> initialValue() { return new LinkedList<double[]>(); } + private static ThreadLocal<VectorBuffer> memPool = new ThreadLocal<VectorBuffer>() { + @Override protected VectorBuffer initialValue() { return new VectorBuffer(0,0,0); } }; // forwarded calls to LibMatrixMult @@ -1444,13 +1442,7 @@ public class LibSpoofPrimitives } public static void setupThreadLocalMemory(int numVectors, int len, int len2) { - LinkedList<double[]> list = new LinkedList<double[]>(); - if( len2 >= 0 ) - for( int i=0; i<numVectors; i++ ) - list.addLast(new double[len2]); - for( int i=0; i<numVectors; i++ ) - list.addLast(new double[len]); - memPool.set(list); + memPool.set(new VectorBuffer(numVectors, len, len2)); } public static void cleanupThreadLocalMemory() { @@ -1462,29 +1454,52 @@ public class LibSpoofPrimitives } protected static double[] allocVector(int len, boolean reset, double resetVal) { - LinkedList<double[]> list = memPool.get(); + VectorBuffer buff = memPool.get(); - //find and remove vector with matching len - double[] vect = null; - Iterator<double[]> iter = list.iterator(); - while( iter.hasNext() ) { - double[] tmp = iter.next(); - if( tmp.length == len ) { - vect = tmp; - iter.remove(); - break; - } - } - - //allocate new vector or re-queue if required + //find next matching vector in ring buffer or + //allocate new vector if required + double[] vect = buff.next(len); if( vect == null ) vect = new double[len]; - else - list.addLast(vect); //reset vector if required if( reset ) Arrays.fill(vect, resetVal); return vect; } + + /** + * Simple ring buffer of allocated vectors, where + * vectors of different sizes are interspersed. + */ + private static class VectorBuffer { + private final double[][] _data; + private int _pos; + private int _len1; + private int _len2; + + public VectorBuffer(int num, int len1, int len2) { + int lnum = (len2 > 0) ? 2*num : num; + _data = new double[lnum][]; + for( int i=0; i<num; i++ ) + if( lnum > num ) { + _data[2*i] = new double[len1]; + _data[2*i+1] = new double[len2]; + } + else { + _data[i] = new double[len1]; + } + _pos = -1; + _len1 = len1; + _len2 = len2; + } + public double[] next(int len) { + if( _len1!=len && _len2!=len ) + return null; + do { + _pos = (_pos+1>=_data.length) ? 0 : _pos+1; + } while( _data[_pos].length!=len ); + return _data[_pos]; + } + } } http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index abe2e78..9f47733 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -122,7 +122,7 @@ public abstract class SpoofRowwise extends SpoofOperator //result allocation and preparations final int m = inputs.get(0).getNumRows(); final int n = inputs.get(0).getNumColumns(); - final int n2 = _type.isRowTypeB1() ? + final int n2 = _type.isRowTypeB1() || hasMatrixSideInput(inputs) ? getMinColsMatrixSideInputs(inputs) : -1; if( !aggIncr || !out.isAllocated() ) allocateOutputMatrix(m, n, n2, out); @@ -170,7 +170,7 @@ public abstract class SpoofRowwise extends SpoofOperator //result allocation and preparations final int m = inputs.get(0).getNumRows(); final int n = inputs.get(0).getNumColumns(); - final int n2 = _type.isRowTypeB1() ? + final int n2 = _type.isRowTypeB1() || hasMatrixSideInput(inputs) ? getMinColsMatrixSideInputs(inputs) : -1; allocateOutputMatrix(m, n, n2, out); @@ -217,6 +217,12 @@ public abstract class SpoofRowwise extends SpoofOperator } } + public static boolean hasMatrixSideInput(ArrayList<MatrixBlock> inputs) { + return IntStream.range(1, inputs.size()) + .mapToObj(i -> inputs.get(i)) + .anyMatch(in -> in.getNumColumns()>1); + } + private static int getMinColsMatrixSideInputs(ArrayList<MatrixBlock> inputs) { //For B1 types, get the output number of columns as the minimum //number of columns of side input matrices other than vectors. http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java index cec0fb0..891dbc9 100644 --- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java +++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java @@ -87,10 +87,15 @@ public class UtilFunctions public static int nextIntPow2( int in ) { int expon = (in==0) ? 0 : 32-Integer.numberOfLeadingZeros(in-1); - long pow2 = (long) Math.pow(2, expon); + long pow2 = pow(2, expon); return (int)((pow2>Integer.MAX_VALUE)?Integer.MAX_VALUE : pow2); } + public static long pow(int base, int exp) { + return (base==2 && 0 <= exp && exp < 63) ? + 1L << exp : (long)Math.pow(base, exp); + } + /** * Computes the 1-based block index based on the global cell index and block size meta * data. See computeCellIndex for the inverse operation. http://git-wip-us.apache.org/repos/asf/systemml/blob/13009689/src/main/java/org/apache/sysml/utils/Statistics.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java b/src/main/java/org/apache/sysml/utils/Statistics.java index b294ba6..5ff2762 100644 --- a/src/main/java/org/apache/sysml/utils/Statistics.java +++ b/src/main/java/org/apache/sysml/utils/Statistics.java @@ -76,11 +76,11 @@ public class Statistics private static final LongAdder codegenCompileTime = new LongAdder(); //in nano private static final LongAdder codegenClassCompileTime = new LongAdder(); //in nano private static final LongAdder codegenHopCompile = new LongAdder(); //count - private static final LongAdder codegenFPlanCompile = new LongAdder(); //count - private static final LongAdder codegenFPlanPartialCost = new LongAdder(); //count - private static final LongAdder codegenFPlanSkip = new LongAdder(); //count private static final LongAdder codegenCPlanCompile = new LongAdder(); //count private static final LongAdder codegenClassCompile = new LongAdder(); //count + private static final LongAdder codegenEnumAll = new LongAdder(); //count + private static final LongAdder codegenEnumEval = new LongAdder(); //count + private static final LongAdder codegenEnumEvalP = new LongAdder(); //count private static final LongAdder codegenPlanCacheHits = new LongAdder(); //count private static final LongAdder codegenPlanCacheTotal = new LongAdder(); //count @@ -259,14 +259,14 @@ public class Statistics codegenCPlanCompile.add(delta); } - public static void incrementCodegenFPlanCompile(long delta) { - codegenFPlanCompile.add(delta); + public static void incrementCodegenEnumAll(long delta) { + codegenEnumAll.add(delta); } - public static void incrementCodegenFPlanPartialCost(long delta) { - codegenFPlanPartialCost.add(delta); + public static void incrementCodegenEnumEval(long delta) { + codegenEnumEval.add(delta); } - public static void incrementCodegenFPlanSkip(long delta) { - codegenFPlanSkip.add(delta); + public static void incrementCodegenEnumEvalP(long delta) { + codegenEnumEvalP.add(delta); } public static void incrementCodegenClassCompile() { @@ -297,14 +297,14 @@ public class Statistics return codegenCPlanCompile.longValue(); } - public static long getCodegenFPlanCompile() { - return codegenFPlanCompile.longValue(); + public static long getCodegenEnumAll() { + return codegenEnumAll.longValue(); } - public static long getCodegenFPlanPartialCost() { - return codegenFPlanPartialCost.longValue(); + public static long getCodegenEnumEval() { + return codegenEnumEval.longValue(); } - public static long getCodegenFPlanSkip() { - return codegenFPlanSkip.longValue(); + public static long getCodegenEnumEvalP() { + return codegenEnumEvalP.longValue(); } public static long getCodegenClassCompile() { @@ -400,11 +400,11 @@ public class Statistics funRecompileTime.reset(); codegenHopCompile.reset(); - codegenFPlanCompile.reset(); - codegenFPlanPartialCost.reset(); - codegenFPlanSkip.reset(); codegenCPlanCompile.reset(); codegenClassCompile.reset(); + codegenEnumAll.reset(); + codegenEnumEval.reset(); + codegenEnumEvalP.reset(); codegenCompileTime.reset(); codegenClassCompileTime.reset(); @@ -741,10 +741,10 @@ public class Statistics sb.append("SystemML Statistics:\n"); if( DMLScript.STATISTICS ) { - sb.append("Total elapsed time:\t\t\t\t" + String.format("%.3f", (getCompileTime()+getRunTime())*1e-9) + " sec.\n"); // nanoSec --> sec - sb.append("Total compilation time:\t\t\t" + String.format("%.3f", getCompileTime()*1e-9) + " sec.\n"); // nanoSec --> sec + sb.append("Total elapsed time:\t\t" + String.format("%.3f", (getCompileTime()+getRunTime())*1e-9) + " sec.\n"); // nanoSec --> sec + sb.append("Total compilation time:\t\t" + String.format("%.3f", getCompileTime()*1e-9) + " sec.\n"); // nanoSec --> sec } - sb.append("Total execution time:\t\t\t" + String.format("%.3f", getRunTime()*1e-9) + " sec.\n"); // nanoSec --> sec + sb.append("Total execution time:\t\t" + String.format("%.3f", getRunTime()*1e-9) + " sec.\n"); // nanoSec --> sec if( OptimizerUtils.isSparkExecutionMode() ) { if( DMLScript.STATISTICS ) //moved into stats on Shiv's request sb.append("Number of compiled Spark inst:\t" + getNoOfCompiledSPInst() + ".\n"); @@ -780,29 +780,28 @@ public class Statistics } sb.append("Cache hits (Mem, WB, FS, HDFS):\t" + CacheStatistics.displayHits() + ".\n"); - sb.append("Cache writes (WB, FS, HDFS): \t" + CacheStatistics.displayWrites() + ".\n"); + sb.append("Cache writes (WB, FS, HDFS):\t" + CacheStatistics.displayWrites() + ".\n"); sb.append("Cache times (ACQr/m, RLS, EXP):\t" + CacheStatistics.displayTime() + " sec.\n"); sb.append("HOP DAGs recompiled (PRED, SB):\t" + getHopRecompiledPredDAGs() + "/" + getHopRecompiledSBDAGs() + ".\n"); - sb.append("HOP DAGs recompile time: \t" + String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n"); + sb.append("HOP DAGs recompile time:\t" + String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n"); if( getFunRecompiles()>0 ) { sb.append("Functions recompiled:\t\t" + getFunRecompiles() + ".\n"); sb.append("Functions recompile time:\t" + String.format("%.3f", ((double)getFunRecompileTime())/1000000000) + " sec.\n"); } if( ConfigurationManager.isCodegenEnabled() ) { - sb.append("Codegen compile (DAG,CP,JC):\t" + getCodegenDAGCompile() + "/" + sb.append("Codegen compile (DAG,CP,JC):\t" + getCodegenDAGCompile() + "/" + getCodegenCPlanCompile() + "/" + getCodegenClassCompile() + ".\n"); - sb.append("Codegen enum (full,part,skip):\t" + getCodegenFPlanCompile() + "/" - + getCodegenFPlanPartialCost() + "/" - + getCodegenFPlanSkip() + ".\n"); + sb.append("Codegen enum (All,Eval,EvalP):\t" + getCodegenEnumAll() + "/" + + getCodegenEnumEval() + "/" + getCodegenEnumEvalP() + ".\n"); sb.append("Codegen compile times (DAG,JC):\t" + String.format("%.3f", (double)getCodegenCompileTime()/1000000000) + "/" + String.format("%.3f", (double)getCodegenClassCompileTime()/1000000000) + " sec.\n"); - sb.append("Codegen plan cache hits: \t" + getCodegenPlanCacheHits() + "/" + getCodegenPlanCacheTotal() + ".\n"); + sb.append("Codegen plan cache hits:\t" + getCodegenPlanCacheHits() + "/" + getCodegenPlanCacheTotal() + ".\n"); } if( OptimizerUtils.isSparkExecutionMode() ){ String lazy = SparkExecutionContext.isLazySparkContextCreation() ? "(lazy)" : "(eager)"; sb.append("Spark ctx create time "+lazy+":\t"+ String.format("%.3f", ((double)sparkCtxCreateTime)*1e-9) + " sec.\n" ); // nanoSec --> sec - sb.append("Spark trans counts(par,bc,col):\t" + + sb.append("Spark trans counts (par,bc,col):" + String.format("%d/%d/%d.\n", sparkParallelizeCount.longValue(), sparkBroadcastCount.longValue(), sparkCollectCount.longValue())); sb.append("Spark trans times (par,bc,col):\t" + @@ -819,9 +818,9 @@ public class Statistics sb.append("ParFor total update in-place:\t" + lTotalUIPVar + "/" + lTotalLixUIP + "/" + lTotalLix + "\n"); } - sb.append("Total JIT compile time:\t\t\t" + ((double)getJITCompileTime())/1000 + " sec.\n"); - sb.append("Total JVM GC count:\t\t\t\t" + getJVMgcCount() + ".\n"); - sb.append("Total JVM GC time:\t\t\t\t" + ((double)getJVMgcTime())/1000 + " sec.\n"); + sb.append("Total JIT compile time:\t\t" + ((double)getJITCompileTime())/1000 + " sec.\n"); + sb.append("Total JVM GC count:\t\t" + getJVMgcCount() + ".\n"); + sb.append("Total JVM GC time:\t\t" + ((double)getJVMgcTime())/1000 + " sec.\n"); LibMatrixDNN.appendStatistics(sb); sb.append("Heavy hitter instructions:\n" + getHeavyHitters(maxHeavyHitters)); }