Repository: systemml
Updated Branches:
  refs/heads/master 382f847de -> b366c0f89


[SYSTEMML-2296] Performance MNC matrix histogram creation

This improves the creation of matrix histograms in the MNC sparsity
estimator. Specifically this includes (1) fused scans of count vectors
to obtain the summary statistics, (2) a better handling of dense basic
scans, and (3) conditional stores for extension vectors. 

When constructing 4 histograms (1 pair w/o and 1 pair with extension
vectors) for P %*% W, where P:= 58825360 x 2519371 (nnz 58825360) and W
:= 2519371 x 300 (nnz 755810998), this patch improved end-to-end
performance from 14.1s to 10.5s


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b366c0f8
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b366c0f8
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b366c0f8

Branch: refs/heads/master
Commit: b366c0f8988f35faee2ea30b5a97ad04c29bda9f
Parents: 382f847
Author: Matthias Boehm <[email protected]>
Authored: Fri Aug 3 20:42:40 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Aug 3 20:42:40 2018 -0700

----------------------------------------------------------------------
 .../hops/estim/EstimatorMatrixHistogram.java    | 97 +++++++++++---------
 .../runtime/matrix/data/DenseBlockDRB.java      |  7 +-
 .../sysml/runtime/matrix/data/LibMatrixAgg.java | 18 ++++
 3 files changed, 71 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/b366c0f8/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java 
b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
index 270d198..6b4c898 100644
--- a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
@@ -201,50 +201,44 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                
                public MatrixHistogram(MatrixBlock in, boolean useExcepts) {
                        // 1) allocate basic synopsis
+                       final int m = in.getNumRows();
+                       final int n = in.getNumColumns();
                        rNnz = new int[in.getNumRows()];
                        cNnz = new int[in.getNumColumns()];
-                       fullDiag = in.getNumRows() == in.getNonZeros();
+                       fullDiag = in.getNumRows() == in.getNonZeros()
+                               && in.getNumRows() == in.getNumColumns();
                        
                        // 2) compute basic synopsis details
                        if( !in.isEmpty() ) {
                                if( in.isInSparseFormat() ) {
-                                       SparseBlock sblock = 
in.getSparseBlock();
-                                       for( int i=0; i<in.getNumRows(); i++ ) {
-                                               if( sblock.isEmpty(i) ) 
continue;
-                                               int apos = sblock.pos(i);
-                                               int alen = sblock.size(i);
-                                               int[] aix = sblock.indexes(i);
+                                       SparseBlock a = in.getSparseBlock();
+                                       for( int i=0; i<m; i++ ) {
+                                               if( a.isEmpty(i) ) continue;
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
                                                rNnz[i] = alen;
-                                               
LibMatrixAgg.countAgg(sblock.values(i), cNnz, aix, apos, alen);
+                                               
LibMatrixAgg.countAgg(a.values(i), cNnz, aix, apos, alen);
                                                fullDiag &= aix[apos] == i;
                                        }
                                }
                                else {
-                                       DenseBlock dblock = in.getDenseBlock();
-                                       for( int i=0; i<in.getNumRows(); i++ ) {
-                                               double[] avals = 
dblock.values(i);
-                                               int lnnz = 0, aix = 
dblock.pos(i);
-                                               for( int j=0; 
j<in.getNumColumns(); j++ ) {
-                                                       if( avals[aix+j] != 0 ) 
{
-                                                               fullDiag &= (i 
== j);
-                                                               cNnz[j] ++;
-                                                               lnnz ++;
-                                                       }
-                                               }
-                                               rNnz[i] = lnnz;
+                                       DenseBlock a = in.getDenseBlock();
+                                       for( int i=0; i<m; i++ ) {
+                                               rNnz[i] = a.countNonZeros(i);
+                                               
LibMatrixAgg.countAgg(a.values(i), cNnz, a.pos(i), n);
+                                               fullDiag &= (rNnz[i]==1 && n>i 
&& a.get(i, i)!=0);
                                        }
                                }
                        }
                        
                        // 3) compute meta data synopsis
-                       rMaxNnz = Arrays.stream(rNnz).max().orElse(0);
-                       cMaxNnz =  Arrays.stream(cNnz).max().orElse(0);
-                       rN1 = (int) Arrays.stream(rNnz).filter(item -> item == 
1).count();
-                       cN1 = (int) Arrays.stream(cNnz).filter(item -> item == 
1).count();
-                       rNonEmpty = (int) Arrays.stream(rNnz).filter(v-> 
v!=0).count();
-                       cNonEmpty = (int) Arrays.stream(cNnz).filter(v-> 
v!=0).count();
-                       rNdiv2 = (int) Arrays.stream(rNnz).filter(item -> item 
> getCols()/2).count();
-                       cNdiv2 = (int) Arrays.stream(cNnz).filter(item -> item 
> getRows()/2).count();
+                       int[] rSummary = deriveSummaryStatistics(rNnz, 
getCols());
+                       int[] cSummary = deriveSummaryStatistics(cNnz, 
getRows());
+                       rMaxNnz = rSummary[0]; cMaxNnz = cSummary[0];
+                       rN1 = rSummary[1]; cN1 = cSummary[1];
+                       rNonEmpty = rSummary[2]; cNonEmpty = cSummary[2];
+                       rNdiv2 = rSummary[3]; cNdiv2 = cSummary[3];
                        
                        // 4) compute exception details if necessary (optional)
                        if( useExcepts & !in.isEmpty() && (rMaxNnz > 1 || 
cMaxNnz > 1) ) {
@@ -252,29 +246,29 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                                cNnz1e = new int[in.getNumColumns()];
                                
                                if( in.isInSparseFormat() ) {
-                                       SparseBlock sblock = 
in.getSparseBlock();
-                                       for( int i=0; i<in.getNumRows(); i++ ) {
-                                               if( sblock.isEmpty(i) ) 
continue;
-                                               int alen = sblock.size(i);
-                                               int apos = sblock.pos(i);
-                                               int[] aix = sblock.indexes(i);
+                                       SparseBlock a = in.getSparseBlock();
+                                       for( int i=0; i<m; i++ ) {
+                                               if( a.isEmpty(i) ) continue;
+                                               int alen = a.size(i);
+                                               int apos = a.pos(i);
+                                               int[] aix = a.indexes(i);
                                                for( int k=apos; k<apos+alen; 
k++ )
-                                                       rNnz1e[i] += 
cNnz[aix[k]] <= 1 ? 1 : 0;
-                                               if( alen <= 1 )
-                                                       for( int k=apos; 
k<apos+alen; k++ )
-                                                               
cNnz1e[aix[k]]++;
+                                                       if( cNnz[aix[k]] <= 1 )
+                                                               rNnz1e[i]++;
+                                               if( alen == 1 )
+                                                       cNnz1e[aix[apos]]++;
                                        }
                                }
                                else {
-                                       DenseBlock dblock = in.getDenseBlock();
-                                       for( int i=0; i<in.getNumRows(); i++ ) {
-                                               double[] avals = 
dblock.values(i);
-                                               int aix = dblock.pos(i);
+                                       DenseBlock a = in.getDenseBlock();
+                                       for( int i=0; i<m; i++ ) {
+                                               double[] avals = a.values(i);
+                                               int aix = a.pos(i);
                                                boolean rNnzlte1 = rNnz[i] <= 1;
-                                               for( int j=0; 
j<in.getNumColumns(); j++ ) {
+                                               for( int j=0; j<n; j++ ) {
                                                        if( avals[aix + j] != 0 
) {
-                                                               rNnz1e[i] += 
cNnz[j] <= 1 ? 1 : 0;
-                                                               cNnz1e[j] +=  
rNnzlte1 ? 1 : 0;
+                                                               if( cNnz[j] <= 
1 ) rNnz1e[i]++;
+                                                               if( rNnzlte1 ) 
cNnz1e[j]++;
                                                        }
                                                }
                                        }
@@ -343,5 +337,18 @@ public class EstimatorMatrixHistogram extends 
SparsityEstimator
                        double randf = rand.nextDouble(); //uniform [0,1)
                        return (int)((f > randf) ? temp+1 : temp);
                }
+               
+               private static int[] deriveSummaryStatistics(int[] counts, int 
N) {
+                       int max = Integer.MIN_VALUE, N2 = N/2;
+                       int cntN1 = 0, cntNeq0 = 0, cntNdiv2 = 0;
+                       for(int i=0; i<counts.length; i++) {
+                               final int cnti = counts[i];
+                               max = Math.max(max, cnti);
+                               cntN1 += (cnti == 1) ? 1 : 0;
+                               cntNeq0 += (cnti != 0) ? 1 : 0;
+                               cntNdiv2 += (cnti > N2) ? 1 : 0;
+                       }
+                       return new int[]{max, cntN1, cntNeq0, cntNdiv2};
+               }
        }
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/b366c0f8/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
index 90fbcfe..72dff0e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
@@ -114,12 +114,7 @@ public class DenseBlockDRB extends DenseBlock
 
        @Override
        public long countNonZeros() {
-               final int len = rlen * clen;
-               double[] a = data;
-               int nnz = 0;
-               for(int i=0; i<len; i++)
-                       nnz += (a[i]!=0) ? 1 : 0;
-               return nnz;
+               return UtilFunctions.computeNnz(data, 0, rlen*clen);
        }
        
        @Override

http://git-wip-us.apache.org/repos/asf/systemml/blob/b366c0f8/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
index 5ae7eda..e3e9dd7 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
@@ -3152,6 +3152,24 @@ public class LibMatrixAgg
                }
        }
        
+       public static void countAgg( double[] a, int[] c, int ai, final int len 
) {
+               final int bn = len%8;
+               //compute rest, not aligned to 8-block
+               for( int i=0; i<bn; i++ )
+                       c[i] += a[ai+i]!=0 ? 1 : 0;
+               //unrolled 8-block (for better instruction level parallelism)
+               for( int i=bn; i<len; i+=8 ) {
+                       c[i+0] += a[ai+i+0]!=0 ? 1 : 0;
+                       c[i+1] += a[ai+i+1]!=0 ? 1 : 0;
+                       c[i+2] += a[ai+i+2]!=0 ? 1 : 0;
+                       c[i+3] += a[ai+i+3]!=0 ? 1 : 0;
+                       c[i+4] += a[ai+i+4]!=0 ? 1 : 0;
+                       c[i+5] += a[ai+i+5]!=0 ? 1 : 0;
+                       c[i+6] += a[ai+i+6]!=0 ? 1 : 0;
+                       c[i+7] += a[ai+i+7]!=0 ? 1 : 0;
+               }
+       }
+       
        private static void countDisAgg( double[] a, double[] c, int[] aix, int 
ai, final int ci, final int len ) {
                final int bn = len%8;
                //compute rest, not aligned to 8-block

Reply via email to