Repository: systemml
Updated Branches:
  refs/heads/master 87a0c0bd4 -> 3bba03184


[SYSTEMML-2409] Improved matrix histogram via num non-empty rows/cols

Closes #788.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3bba0318
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3bba0318
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3bba0318

Branch: refs/heads/master
Commit: 3bba0318403bbc8232d4806e151df24951603992
Parents: 87a0c0b
Author: Johanna Sommer <[email protected]>
Authored: Wed Jun 20 18:56:35 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Wed Jun 20 18:56:36 2018 -0700

----------------------------------------------------------------------
 .../hops/estim/EstimatorMatrixHistogram.java    | 98 ++++++++++----------
 1 file changed, 48 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/3bba0318/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java 
b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
index 7fc4bb0..fb1a0d8 100644
--- a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
@@ -19,6 +19,8 @@
 
 package org.apache.sysml.hops.estim;
 
+import java.util.Arrays;
+
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.DenseBlock;
@@ -118,57 +120,66 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                        nnz = (long)(spOut * mnOut);
                }
                
+               //exploit upper bound on nnz based on non-empty rows/cols
+               nnz = (h1.rNonEmpty >= 0 && h2.cNonEmpty >= 0) ?
+                       Math.min((long)h1.rNonEmpty * h2.cNonEmpty, nnz) : nnz;
+               
                //compute final sparsity
                return OptimizerUtils.getSparsity(
                        h1.getRows(), h2.getCols(), nnz);
        }
        
        private static class MatrixHistogram {
-               private final int[] rNnz;    //row nnz counts
-               private int[] rNnz1e = null; //row nnz counts for cols w/ <= 1 
non-zeros
-               private final int[] cNnz;    //column nnz counts
-               private int[] cNnz1e = null; //column nnz counts for rows w/ <= 
1 non-zeros
-               private int rMaxNnz = 0;
-               private int cMaxNnz = 0;
+               private final int[] rNnz;    //nnz per row
+               private int[] rNnz1e = null; //nnz per row for cols w/ <= 1 
non-zeros
+               private final int[] cNnz;    //nnz per col
+               private int[] cNnz1e = null; //nnz per col for rows w/ <= 1 
non-zeros
+               private final int rMaxNnz;   //max nnz per row
+               private final int cMaxNnz;   //max nnz per col
+               private final int rNonEmpty; //number of non-empty rows (an 
empty row has nnz=0)
+               private final int cNonEmpty; //number of non-empty cols (an 
empty col has nnz=0)
                
                public MatrixHistogram(MatrixBlock in, boolean useExcepts) {
-                       //allocate basic synopsis
+                       // 1) allocate basic synopsis
                        rNnz = new int[in.getNumRows()];
                        cNnz = new int[in.getNumColumns()];
-                       if( in.isEmptyBlock(false) )
-                               return;
                        
-                       //compute basic synopsis details
-                       if( in.isInSparseFormat() ) {
-                               SparseBlock sblock = in.getSparseBlock();
-                               for( int i=0; i<in.getNumRows(); i++ ) {
-                                       if( sblock.isEmpty(i) ) continue;
-                                       int alen = sblock.size(i);
-                                       rNnz[i] = alen;
-                                       rMaxNnz = Math.max(rMaxNnz, alen);
-                                       LibMatrixAgg.countAgg(sblock.values(i),
-                                               cNnz, sblock.indexes(i), 
sblock.pos(i), alen);
+                       // 2) compute basic synopsis details
+                       if( !in.isEmpty() ) {
+                               if( in.isInSparseFormat() ) {
+                                       SparseBlock sblock = 
in.getSparseBlock();
+                                       for( int i=0; i<in.getNumRows(); i++ ) {
+                                               if( sblock.isEmpty(i) ) 
continue;
+                                               int alen = sblock.size(i);
+                                               rNnz[i] = alen;
+                                               
LibMatrixAgg.countAgg(sblock.values(i),
+                                                       cNnz, 
sblock.indexes(i), sblock.pos(i), alen);
+                                       }
                                }
-                       }
-                       else {
-                               DenseBlock dblock = in.getDenseBlock();
-                               for( int i=0; i<in.getNumRows(); i++ ) {
-                                       double[] avals = dblock.values(i);
-                                       int lnnz = 0, aix = dblock.pos(i);
-                                       for( int j=0; j<in.getNumColumns(); j++ 
) {
-                                               if( avals[aix+j] != 0 ) {
-                                                       cNnz[j] ++;
-                                                       lnnz ++;
+                               else {
+                                       DenseBlock dblock = in.getDenseBlock();
+                                       for( int i=0; i<in.getNumRows(); i++ ) {
+                                               double[] avals = 
dblock.values(i);
+                                               int lnnz = 0, aix = 
dblock.pos(i);
+                                               for( int j=0; 
j<in.getNumColumns(); j++ ) {
+                                                       if( avals[aix+j] != 0 ) 
{
+                                                               cNnz[j] ++;
+                                                               lnnz ++;
+                                                       }
                                                }
+                                               rNnz[i] = lnnz;
                                        }
-                                       rNnz[i] = lnnz;
-                                       rMaxNnz = Math.max(rMaxNnz, lnnz);
                                }
                        }
-                       cMaxNnz = max(cNnz, 0, in.getNumColumns());
                        
-                       //compute exception details if necessary (optional)
-                       if( useExcepts && (rMaxNnz > 1 || cMaxNnz > 1) ) {
+                       // 3) compute meta data synopsis
+                       rMaxNnz = Arrays.stream(rNnz).max().orElse(0);
+                       cMaxNnz =  Arrays.stream(cNnz).max().orElse(0);
+                       rNonEmpty = (int) Arrays.stream(rNnz).filter(v-> 
v!=0).count();
+                       cNonEmpty = (int) Arrays.stream(cNnz).filter(v-> 
v!=0).count();
+                       
+                       // 4) compute exception details if necessary (optional)
+                       if( useExcepts & !in.isEmpty() && (rMaxNnz > 1 || 
cMaxNnz > 1) ) {
                                rNnz1e = new int[in.getNumRows()];
                                cNnz1e = new int[in.getNumColumns()];
                                
@@ -210,6 +221,7 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                        cNnz1e = c1e;
                        rMaxNnz = rmax;
                        cMaxNnz = cmax;
+                       rNonEmpty = cNonEmpty = -1;
                }
                
                public int getRows() {
@@ -222,8 +234,8 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                
                public static MatrixHistogram 
deriveOutputHistogram(MatrixHistogram h1, MatrixHistogram h2, double spOut) {
                        //get input/output nnz for scaling
-                       long nnz1 = sum(h1.rNnz, 0, h1.getRows());
-                       long nnz2 = sum(h2.cNnz, 0, h2.getCols());
+                       long nnz1 = Arrays.stream(h1.rNnz).sum();
+                       long nnz2 = Arrays.stream(h2.cNnz).sum();
                        double nnzOut = spOut * h1.getRows() * h2.getCols();
                        
                        //propagate h1.r and h2.c to output via simple scaling
@@ -243,19 +255,5 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                        //construct new histogram object
                        return new MatrixHistogram(rNnz, null, cNnz, null, 
rMaxNnz, cMaxNnz);
                }
-               
-               private static int max(int[] a, int ai, int alen) {
-                       int ret = Integer.MIN_VALUE;
-                       for(int i=ai; i<ai+alen; i++)
-                               ret = Math.max(ret, a[i]);
-                       return ret;
-               }
-               
-               private static long sum(int[] a, int ai, int alen) {
-                       int ret = 0;
-                       for(int i=ai; i<ai+alen; i++)
-                               ret += a[i];
-                       return ret;
-               }
        }
 }

Reply via email to