Repository: systemml Updated Branches: refs/heads/master aa253eb7e -> 78bfb7712
[SYSTEMML-2296] New sparsity estimator based on matrix histograms This patch introduces a new sparsity estimator based on matrix histograms, i.e., a synopsis that stores the number of non-zeros per row/column for a matrix. This extremely simply yet effective synopsis allows to exploit structural properties of permutation and selection matrices. For example, if all rows have <=1 non-zeros the exact number of non-zeros in the output can be computed as the simple dot product of lhs columns counts times rhs row counts. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/78bfb771 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/78bfb771 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/78bfb771 Branch: refs/heads/master Commit: 78bfb77120e5952db42e5471b43c7d5d5335f455 Parents: aa253eb Author: Matthias Boehm <[email protected]> Authored: Wed May 2 14:35:21 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Wed May 2 14:35:21 2018 -0700 ---------------------------------------------------------------------- .../hops/estim/EstimatorMatrixHistogram.java | 133 +++++++++++++++++++ .../sysml/runtime/matrix/data/LibMatrixAgg.java | 2 +- .../functions/estim/OuterProductTest.java | 11 ++ .../functions/estim/SquaredProductTest.java | 11 ++ 4 files changed, 156 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java new file mode 100644 index 0000000..6fbb5f0 --- /dev/null +++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.hops.estim; + +import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.matrix.MatrixCharacteristics; +import org.apache.sysml.runtime.matrix.data.DenseBlock; +import org.apache.sysml.runtime.matrix.data.LibMatrixAgg; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlock; + +/** + * This estimator implements a remarkably simple yet effective + * approach for incorporating structural properties into sparsity + * estimation. The key idea is to maintain row and column nnz per + * matrix, along with additional meta data. + */ +public class EstimatorMatrixHistogram extends SparsityEstimator +{ + @Override + public double estim(MMNode root) { + throw new DMLRuntimeException("Estimation of " + + "intermediate matrix histograms not supported yet."); + } + + @Override + public double estim(MatrixBlock m1, MatrixBlock m2) { + MatrixHistogram h1 = new MatrixHistogram(m1); + MatrixHistogram h2 = new MatrixHistogram(m2); + return estimIntern(h1, h2); + } + + @Override + public double estim(MatrixCharacteristics mc1, MatrixCharacteristics mc2) { + LOG.warn("Meta-data-only estimates not supported in " + + "EstimatorMatrixHistogram, falling back to EstimatorBasicAvg."); + return new EstimatorBasicAvg().estim(mc1, mc2); + } + + private double estimIntern(MatrixHistogram h1, MatrixHistogram h2) { + long nnz = 0; + //special case, with exact sparsity estimate, where the dot product + //dot(h1.cNnz,h2rNnz) gives the exact number of non-zeros in the output + if( h1.rMaxNnz <= 1 ) { + for( int j=0; j<h1.getCols(); j++ ) + nnz += h1.cNnz[j] * h2.rNnz[j]; + } + //general case with approximate output + else { + int mnOut = h1.getRows()*h2.getCols(); + double spOut = 0; + for( int j=0; j<h1.getCols(); j++ ) { + double lsp = (double) h1.cNnz[j] * h2.rNnz[j] / mnOut; + spOut = spOut + lsp - spOut*lsp; + } + nnz = (long)(spOut * mnOut); + } + + //compute final sparsity + return OptimizerUtils.getSparsity( + h1.getRows(), h2.getCols(), nnz); + } + + private static class MatrixHistogram { + private final int[] rNnz; + private final int[] cNnz; + private int rMaxNnz = 0; + private int cMaxNnz = 0; + + public MatrixHistogram(MatrixBlock in) { + rNnz = new int[in.getNumRows()]; + cNnz = new int[in.getNumColumns()]; + if( in.isEmptyBlock(false) ) + return; + + if( in.isInSparseFormat() ) { + SparseBlock sblock = in.getSparseBlock(); + for( int i=0; i<in.getNumRows(); i++ ) { + if( sblock.isEmpty(i) ) continue; + int alen = sblock.size(i); + rNnz[i] = alen; + rMaxNnz = Math.max(rMaxNnz, alen); + LibMatrixAgg.countAgg(sblock.values(i), + cNnz, sblock.indexes(i), sblock.pos(i), alen); + } + } + else { + DenseBlock dblock = in.getDenseBlock(); + for( int i=0; i<in.getNumRows(); i++ ) { + double[] avals = dblock.values(i); + int lnnz = 0, aix = dblock.pos(i); + for( int j=0; j<in.getNumColumns(); j++ ) { + if( avals[aix+j] != 0 ) { + cNnz[j] ++; + lnnz ++; + } + } + rNnz[i] = lnnz; + rMaxNnz = Math.max(rMaxNnz, lnnz); + } + } + + for(int j=0; j<in.getNumColumns(); j++) + cMaxNnz = Math.max(cMaxNnz, cNnz[j]); + } + + public int getRows() { + return rNnz.length; + } + + public int getCols() { + return cNnz.length; + } + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java index 5dfddbd..db73ce2 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java @@ -3098,7 +3098,7 @@ public class LibMatrixAgg return minindex; } - private static void countAgg( double[] a, int[] c, int[] aix, int ai, final int len ) { + public static void countAgg( double[] a, int[] c, int[] aix, int ai, final int len ) { final int bn = len%8; //compute rest, not aligned to 8-block for( int i=ai; i<ai+bn; i++ ) http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java index 90f316c..f58a0c4 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java @@ -24,6 +24,7 @@ import org.apache.sysml.hops.estim.EstimatorBasicAvg; import org.apache.sysml.hops.estim.EstimatorBasicWorst; import org.apache.sysml.hops.estim.EstimatorBitsetMM; import org.apache.sysml.hops.estim.EstimatorDensityMap; +import org.apache.sysml.hops.estim.EstimatorMatrixHistogram; import org.apache.sysml.hops.estim.SparsityEstimator; import org.apache.sysml.runtime.instructions.InstructionUtils; import org.apache.sysml.runtime.matrix.data.MatrixBlock; @@ -97,6 +98,16 @@ public class OuterProductTest extends AutomatedTestBase runSparsityEstimateTest(new EstimatorBitsetMM(), m, k, n, case2); } + @Test + public void testMatrixHistogramCase1() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case1); + } + + @Test + public void testMatrixHistogramCase2() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case2); + } + private void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3); http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java index 99b1b3b..4cbd06c 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java @@ -24,6 +24,7 @@ import org.apache.sysml.hops.estim.EstimatorBasicAvg; import org.apache.sysml.hops.estim.EstimatorBasicWorst; import org.apache.sysml.hops.estim.EstimatorBitsetMM; import org.apache.sysml.hops.estim.EstimatorDensityMap; +import org.apache.sysml.hops.estim.EstimatorMatrixHistogram; import org.apache.sysml.hops.estim.SparsityEstimator; import org.apache.sysml.runtime.instructions.InstructionUtils; import org.apache.sysml.runtime.matrix.data.MatrixBlock; @@ -102,6 +103,16 @@ public class SquaredProductTest extends AutomatedTestBase runSparsityEstimateTest(new EstimatorBitsetMM(), m, k, n, case2); } + @Test + public void testMatrixHistogramCase1() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case1); + } + + @Test + public void testMatrixHistogramCase2() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case2); + } + private void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) { MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3); MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3);
