Repository: systemml Updated Branches: refs/heads/master 82a1d3468 -> 375ad1a14
[SYSTEMML-2409] Improved matrix histogram (add meta data exploitation) This patch adds further meta data (the number of rows/cols with 1 non-zero) to the matrix histogram sparsity estimator and exploits this information during sparsity estimation. Specifically, in mixed mode, the exact and approximate fractions are known not to collide. Hence, we now tighten the area the approximate fraction targets which improves the estimate by more precisely estimating collisions. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/375ad1a1 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/375ad1a1 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/375ad1a1 Branch: refs/heads/master Commit: 375ad1a143020bdf73fe96e292681adcfc0177eb Parents: 82a1d34 Author: Matthias Boehm <[email protected]> Authored: Thu Jul 19 23:05:01 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu Jul 19 23:05:01 2018 -0700 ---------------------------------------------------------------------- .../hops/estim/EstimatorMatrixHistogram.java | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/375ad1a1/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java index daac3df..96f5ab7 100644 --- a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java +++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java @@ -98,8 +98,9 @@ public class EstimatorMatrixHistogram extends SparsityEstimator //special case, with hybrid exact and approximate output else if(h1.cNnz1e!=null && h2.rNnz1e != null) { //note: normally h1.getRows()*h2.getCols() would define mnOut - //but by leveraging the knowledge of empty rows/cols we do better - int mnOut = h1.rNonEmpty * h2.cNonEmpty; + //but by leveraging the knowledge of rows/cols w/ <=1 nnz, we account + //that exact and approximate fractions touch different areas + int mnOut = (h1.rNonEmpty-h1.rN1) * (h2.cNonEmpty-h2.cN1); double spOutRest = 0; for( int j=0; j<h1.getCols(); j++ ) { //exact fractions, w/o double counting @@ -137,16 +138,16 @@ public class EstimatorMatrixHistogram extends SparsityEstimator } private static class MatrixHistogram { + // count vectors (the histogram) private final int[] rNnz; //nnz per row private int[] rNnz1e = null; //nnz per row for cols w/ <= 1 non-zeros private final int[] cNnz; //nnz per col private int[] cNnz1e = null; //nnz per col for rows w/ <= 1 non-zeros - private final int rMaxNnz; //max nnz per row - private final int cMaxNnz; //max nnz per col - private final int rNonEmpty; //number of non-empty rows (an empty row has nnz=0) - private final int cNonEmpty; //number of non-empty cols (an empty col has nnz=0) - private final int rNdiv2; //number of rows with nnz > #cols/2 - private final int cNdiv2; //number of cols with nnz > #rows/2 + // additional summary statistics + private final int rMaxNnz, cMaxNnz; //max nnz per row/row + private final int rN1, cN1; //number of rows/cols with nnz=1 + private final int rNonEmpty, cNonEmpty; //number of non-empty rows/cols (w/ empty is nnz=0) + private final int rNdiv2, cNdiv2; //number of rows/cols with nnz > #cols/2 and #rows/2 public MatrixHistogram(MatrixBlock in, boolean useExcepts) { // 1) allocate basic synopsis @@ -184,6 +185,8 @@ public class EstimatorMatrixHistogram extends SparsityEstimator // 3) compute meta data synopsis rMaxNnz = Arrays.stream(rNnz).max().orElse(0); cMaxNnz = Arrays.stream(cNnz).max().orElse(0); + rN1 = (int) Arrays.stream(rNnz).filter(item -> item == 1).count(); + cN1 = (int) Arrays.stream(cNnz).filter(item -> item == 1).count(); rNonEmpty = (int) Arrays.stream(rNnz).filter(v-> v!=0).count(); cNonEmpty = (int) Arrays.stream(cNnz).filter(v-> v!=0).count(); rNdiv2 = (int) Arrays.stream(rNnz).filter(item -> item > getCols()/2).count(); @@ -232,6 +235,7 @@ public class EstimatorMatrixHistogram extends SparsityEstimator cNnz1e = c1e; rMaxNnz = rmax; cMaxNnz = cmax; + rN1 = cN1 = -1; rNonEmpty = cNonEmpty = -1; rNdiv2 = cNdiv2 = -1; }
