Repository: systemml
Updated Branches:
  refs/heads/master 82a1d3468 -> 375ad1a14


[SYSTEMML-2409] Improved matrix histogram (add meta data exploitation)

This patch adds further meta data (the number of rows/cols with 1
non-zero) to the matrix histogram sparsity estimator and exploits this
information during sparsity estimation. Specifically, in mixed mode, the
exact and approximate fractions are known not to collide. Hence, we now
tighten the area the approximate fraction targets which improves the
estimate by more precisely estimating collisions.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/375ad1a1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/375ad1a1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/375ad1a1

Branch: refs/heads/master
Commit: 375ad1a143020bdf73fe96e292681adcfc0177eb
Parents: 82a1d34
Author: Matthias Boehm <[email protected]>
Authored: Thu Jul 19 23:05:01 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Thu Jul 19 23:05:01 2018 -0700

----------------------------------------------------------------------
 .../hops/estim/EstimatorMatrixHistogram.java    | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/375ad1a1/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java 
b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
index daac3df..96f5ab7 100644
--- a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
@@ -98,8 +98,9 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                //special case, with hybrid exact and approximate output
                else if(h1.cNnz1e!=null && h2.rNnz1e != null) {
                        //note: normally h1.getRows()*h2.getCols() would define 
mnOut
-                       //but by leveraging the knowledge of empty rows/cols we 
do better
-                       int mnOut = h1.rNonEmpty * h2.cNonEmpty;
+                       //but by leveraging the knowledge of rows/cols w/ <=1 
nnz, we account
+                       //that exact and approximate fractions touch different 
areas
+                       int mnOut = (h1.rNonEmpty-h1.rN1) * 
(h2.cNonEmpty-h2.cN1);
                        double spOutRest = 0;
                        for( int j=0; j<h1.getCols(); j++ ) {
                                //exact fractions, w/o double counting
@@ -137,16 +138,16 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
        }
        
        private static class MatrixHistogram {
+               // count vectors (the histogram)
                private final int[] rNnz;    //nnz per row
                private int[] rNnz1e = null; //nnz per row for cols w/ <= 1 
non-zeros
                private final int[] cNnz;    //nnz per col
                private int[] cNnz1e = null; //nnz per col for rows w/ <= 1 
non-zeros
-               private final int rMaxNnz;   //max nnz per row
-               private final int cMaxNnz;   //max nnz per col
-               private final int rNonEmpty; //number of non-empty rows (an 
empty row has nnz=0)
-               private final int cNonEmpty; //number of non-empty cols (an 
empty col has nnz=0)
-               private final int rNdiv2;    //number of rows with nnz > #cols/2
-               private final int cNdiv2;    //number of cols with nnz > #rows/2
+               // additional summary statistics
+               private final int rMaxNnz, cMaxNnz;     //max nnz per row/row
+               private final int rN1, cN1;             //number of rows/cols 
with nnz=1
+               private final int rNonEmpty, cNonEmpty; //number of non-empty 
rows/cols (w/ empty is nnz=0)
+               private final int rNdiv2, cNdiv2;       //number of rows/cols 
with nnz > #cols/2 and #rows/2
                
                public MatrixHistogram(MatrixBlock in, boolean useExcepts) {
                        // 1) allocate basic synopsis
@@ -184,6 +185,8 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                        // 3) compute meta data synopsis
                        rMaxNnz = Arrays.stream(rNnz).max().orElse(0);
                        cMaxNnz =  Arrays.stream(cNnz).max().orElse(0);
+                       rN1 = (int) Arrays.stream(rNnz).filter(item -> item == 
1).count();
+                       cN1 = (int) Arrays.stream(cNnz).filter(item -> item == 
1).count();
                        rNonEmpty = (int) Arrays.stream(rNnz).filter(v-> 
v!=0).count();
                        cNonEmpty = (int) Arrays.stream(cNnz).filter(v-> 
v!=0).count();
                        rNdiv2 = (int) Arrays.stream(rNnz).filter(item -> item 
> getCols()/2).count();
@@ -232,6 +235,7 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                        cNnz1e = c1e;
                        rMaxNnz = rmax;
                        cMaxNnz = cmax;
+                       rN1 = cN1 = -1;
                        rNonEmpty = cNonEmpty = -1;
                        rNdiv2 = cNdiv2 = -1;
                }

Reply via email to