Repository: systemml Updated Branches: refs/heads/master 382f847de -> b366c0f89
[SYSTEMML-2296] Performance MNC matrix histogram creation This improves the creation of matrix histograms in the MNC sparsity estimator. Specifically this includes (1) fused scans of count vectors to obtain the summary statistics, (2) a better handling of dense basic scans, and (3) conditional stores for extension vectors. When constructing 4 histograms (1 pair w/o and 1 pair with extension vectors) for P %*% W, where P:= 58825360 x 2519371 (nnz 58825360) and W := 2519371 x 300 (nnz 755810998), this patch improved end-to-end performance from 14.1s to 10.5s Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b366c0f8 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b366c0f8 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b366c0f8 Branch: refs/heads/master Commit: b366c0f8988f35faee2ea30b5a97ad04c29bda9f Parents: 382f847 Author: Matthias Boehm <[email protected]> Authored: Fri Aug 3 20:42:40 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Fri Aug 3 20:42:40 2018 -0700 ---------------------------------------------------------------------- .../hops/estim/EstimatorMatrixHistogram.java | 97 +++++++++++--------- .../runtime/matrix/data/DenseBlockDRB.java | 7 +- .../sysml/runtime/matrix/data/LibMatrixAgg.java | 18 ++++ 3 files changed, 71 insertions(+), 51 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/b366c0f8/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java index 270d198..6b4c898 100644 --- a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java +++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java @@ -201,50 +201,44 @@ public class EstimatorMatrixHistogram extends SparsityEstimator public MatrixHistogram(MatrixBlock in, boolean useExcepts) { // 1) allocate basic synopsis + final int m = in.getNumRows(); + final int n = in.getNumColumns(); rNnz = new int[in.getNumRows()]; cNnz = new int[in.getNumColumns()]; - fullDiag = in.getNumRows() == in.getNonZeros(); + fullDiag = in.getNumRows() == in.getNonZeros() + && in.getNumRows() == in.getNumColumns(); // 2) compute basic synopsis details if( !in.isEmpty() ) { if( in.isInSparseFormat() ) { - SparseBlock sblock = in.getSparseBlock(); - for( int i=0; i<in.getNumRows(); i++ ) { - if( sblock.isEmpty(i) ) continue; - int apos = sblock.pos(i); - int alen = sblock.size(i); - int[] aix = sblock.indexes(i); + SparseBlock a = in.getSparseBlock(); + for( int i=0; i<m; i++ ) { + if( a.isEmpty(i) ) continue; + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); rNnz[i] = alen; - LibMatrixAgg.countAgg(sblock.values(i), cNnz, aix, apos, alen); + LibMatrixAgg.countAgg(a.values(i), cNnz, aix, apos, alen); fullDiag &= aix[apos] == i; } } else { - DenseBlock dblock = in.getDenseBlock(); - for( int i=0; i<in.getNumRows(); i++ ) { - double[] avals = dblock.values(i); - int lnnz = 0, aix = dblock.pos(i); - for( int j=0; j<in.getNumColumns(); j++ ) { - if( avals[aix+j] != 0 ) { - fullDiag &= (i == j); - cNnz[j] ++; - lnnz ++; - } - } - rNnz[i] = lnnz; + DenseBlock a = in.getDenseBlock(); + for( int i=0; i<m; i++ ) { + rNnz[i] = a.countNonZeros(i); + LibMatrixAgg.countAgg(a.values(i), cNnz, a.pos(i), n); + fullDiag &= (rNnz[i]==1 && n>i && a.get(i, i)!=0); } } } // 3) compute meta data synopsis - rMaxNnz = Arrays.stream(rNnz).max().orElse(0); - cMaxNnz = Arrays.stream(cNnz).max().orElse(0); - rN1 = (int) Arrays.stream(rNnz).filter(item -> item == 1).count(); - cN1 = (int) Arrays.stream(cNnz).filter(item -> item == 1).count(); - rNonEmpty = (int) Arrays.stream(rNnz).filter(v-> v!=0).count(); - cNonEmpty = (int) Arrays.stream(cNnz).filter(v-> v!=0).count(); - rNdiv2 = (int) Arrays.stream(rNnz).filter(item -> item > getCols()/2).count(); - cNdiv2 = (int) Arrays.stream(cNnz).filter(item -> item > getRows()/2).count(); + int[] rSummary = deriveSummaryStatistics(rNnz, getCols()); + int[] cSummary = deriveSummaryStatistics(cNnz, getRows()); + rMaxNnz = rSummary[0]; cMaxNnz = cSummary[0]; + rN1 = rSummary[1]; cN1 = cSummary[1]; + rNonEmpty = rSummary[2]; cNonEmpty = cSummary[2]; + rNdiv2 = rSummary[3]; cNdiv2 = cSummary[3]; // 4) compute exception details if necessary (optional) if( useExcepts & !in.isEmpty() && (rMaxNnz > 1 || cMaxNnz > 1) ) { @@ -252,29 +246,29 @@ public class EstimatorMatrixHistogram extends SparsityEstimator cNnz1e = new int[in.getNumColumns()]; if( in.isInSparseFormat() ) { - SparseBlock sblock = in.getSparseBlock(); - for( int i=0; i<in.getNumRows(); i++ ) { - if( sblock.isEmpty(i) ) continue; - int alen = sblock.size(i); - int apos = sblock.pos(i); - int[] aix = sblock.indexes(i); + SparseBlock a = in.getSparseBlock(); + for( int i=0; i<m; i++ ) { + if( a.isEmpty(i) ) continue; + int alen = a.size(i); + int apos = a.pos(i); + int[] aix = a.indexes(i); for( int k=apos; k<apos+alen; k++ ) - rNnz1e[i] += cNnz[aix[k]] <= 1 ? 1 : 0; - if( alen <= 1 ) - for( int k=apos; k<apos+alen; k++ ) - cNnz1e[aix[k]]++; + if( cNnz[aix[k]] <= 1 ) + rNnz1e[i]++; + if( alen == 1 ) + cNnz1e[aix[apos]]++; } } else { - DenseBlock dblock = in.getDenseBlock(); - for( int i=0; i<in.getNumRows(); i++ ) { - double[] avals = dblock.values(i); - int aix = dblock.pos(i); + DenseBlock a = in.getDenseBlock(); + for( int i=0; i<m; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); boolean rNnzlte1 = rNnz[i] <= 1; - for( int j=0; j<in.getNumColumns(); j++ ) { + for( int j=0; j<n; j++ ) { if( avals[aix + j] != 0 ) { - rNnz1e[i] += cNnz[j] <= 1 ? 1 : 0; - cNnz1e[j] += rNnzlte1 ? 1 : 0; + if( cNnz[j] <= 1 ) rNnz1e[i]++; + if( rNnzlte1 ) cNnz1e[j]++; } } } @@ -343,5 +337,18 @@ public class EstimatorMatrixHistogram extends SparsityEstimator double randf = rand.nextDouble(); //uniform [0,1) return (int)((f > randf) ? temp+1 : temp); } + + private static int[] deriveSummaryStatistics(int[] counts, int N) { + int max = Integer.MIN_VALUE, N2 = N/2; + int cntN1 = 0, cntNeq0 = 0, cntNdiv2 = 0; + for(int i=0; i<counts.length; i++) { + final int cnti = counts[i]; + max = Math.max(max, cnti); + cntN1 += (cnti == 1) ? 1 : 0; + cntNeq0 += (cnti != 0) ? 1 : 0; + cntNdiv2 += (cnti > N2) ? 1 : 0; + } + return new int[]{max, cntN1, cntNeq0, cntNdiv2}; + } } } http://git-wip-us.apache.org/repos/asf/systemml/blob/b366c0f8/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java index 90fbcfe..72dff0e 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java @@ -114,12 +114,7 @@ public class DenseBlockDRB extends DenseBlock @Override public long countNonZeros() { - final int len = rlen * clen; - double[] a = data; - int nnz = 0; - for(int i=0; i<len; i++) - nnz += (a[i]!=0) ? 1 : 0; - return nnz; + return UtilFunctions.computeNnz(data, 0, rlen*clen); } @Override http://git-wip-us.apache.org/repos/asf/systemml/blob/b366c0f8/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java index 5ae7eda..e3e9dd7 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java @@ -3152,6 +3152,24 @@ public class LibMatrixAgg } } + public static void countAgg( double[] a, int[] c, int ai, final int len ) { + final int bn = len%8; + //compute rest, not aligned to 8-block + for( int i=0; i<bn; i++ ) + c[i] += a[ai+i]!=0 ? 1 : 0; + //unrolled 8-block (for better instruction level parallelism) + for( int i=bn; i<len; i+=8 ) { + c[i+0] += a[ai+i+0]!=0 ? 1 : 0; + c[i+1] += a[ai+i+1]!=0 ? 1 : 0; + c[i+2] += a[ai+i+2]!=0 ? 1 : 0; + c[i+3] += a[ai+i+3]!=0 ? 1 : 0; + c[i+4] += a[ai+i+4]!=0 ? 1 : 0; + c[i+5] += a[ai+i+5]!=0 ? 1 : 0; + c[i+6] += a[ai+i+6]!=0 ? 1 : 0; + c[i+7] += a[ai+i+7]!=0 ? 1 : 0; + } + } + private static void countDisAgg( double[] a, double[] c, int[] aix, int ai, final int ci, final int len ) { final int bn = len%8; //compute rest, not aligned to 8-block
