[SYSTEMML-2461] New estimation util for analyzing self-product NNZs This patch adds a new estimation utility for analyzing the exact output number of non-zeros of self matrix products without the need to materialize the output matrix.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/44a1a67d Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/44a1a67d Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/44a1a67d Branch: refs/heads/master Commit: 44a1a67df134ccc6194ade35f51fe9736717434c Parents: 59a0bb2 Author: Matthias Boehm <mboe...@gmail.com> Authored: Sat Jul 21 01:18:16 2018 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sat Jul 21 01:18:16 2018 -0700 ---------------------------------------------------------------------- .../sysml/hops/estim/EstimationUtils.java | 109 ++++++++++++++ .../functions/estim/SelfProductTest.java | 146 +++++++++++++++++++ .../functions/estim/ZPackageSuite.java | 1 + 3 files changed, 256 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/44a1a67d/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java b/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java new file mode 100644 index 0000000..d4d30bb --- /dev/null +++ b/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.hops.estim; + +import java.util.Arrays; + +import org.apache.sysml.runtime.matrix.data.DenseBlock; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlock; +import org.apache.sysml.runtime.matrix.data.SparseRowVector; +import org.apache.sysml.runtime.util.UtilFunctions; + +public abstract class EstimationUtils +{ + /** + * This utility function computes the exact output nnz + * of a self matrix product without need to materialize + * the output. + * + * @param m dense or sparse input matrix + * @return exact output number of non-zeros. + */ + public static long getSelfProductOutputNnz(MatrixBlock m1) { + final int m = m1.getNumRows(); + final int n = m1.getNumColumns(); + long retNnz = 0; + + if( m1.isInSparseFormat() ) { + SparseBlock a = m1.getSparseBlock(); + SparseRowVector tmpS = new SparseRowVector(1024); + double[] tmpD = null; + + for( int i=0; i<m; i++ ) { + if( a.isEmpty(i) ) continue; + int alen = a.size(i); + int apos = a.pos(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + + //compute number of aggregated non-zeros for input row + int nnz1 = (int) Math.min(UtilFunctions.computeNnz(a, aix, apos, alen), n); + boolean ldense = nnz1 > n / 128; + + //perform vector-matrix multiply w/ dense or sparse output + if( ldense ) { //init dense tmp row + tmpD = (tmpD == null) ? new double[n] : tmpD; + Arrays.fill(tmpD, 0); + } + else { + tmpS.setSize(0); + } + for( int k=apos; k<apos+alen; k++ ) { + if( a.isEmpty(aix[k]) ) continue; + int blen = a.size(aix[k]); + int bpos = a.pos(aix[k]); + int[] bix = a.indexes(aix[k]); + double aval = avals[k]; + double[] bvals = a.values(aix[k]); + if( ldense ) { //dense aggregation + for( int j=bpos; j<bpos+blen; j++ ) + tmpD[bix[j]] += aval * bvals[j]; + } + else { //sparse aggregation + for( int j=bpos; j<bpos+blen; j++ ) + tmpS.add(bix[j], aval * bvals[j]); + } + } + retNnz += !ldense ? tmpS.size() : + UtilFunctions.computeNnz(tmpD, 0, n); + } + } + else { //dense + DenseBlock a = m1.getDenseBlock(); + double[] tmp = new double[n]; + for( int i=0; i<m; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); + Arrays.fill(tmp, 0); //reset + for( int k=aix; k<aix+n; k++ ) { + double aval = avals[k]; + if( aval == 0 ) continue; + double[] bvals = a.values(k); + int bix = a.pos(k); + for( int j=0; j<n; j++ ) + tmp[j] += aval * bvals[bix+j]; + } + retNnz += UtilFunctions.computeNnz(tmp, 0, n); + } + } + return retNnz; + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/44a1a67d/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java new file mode 100644 index 0000000..d609f41 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.estim; + +import org.junit.Test; +import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.hops.estim.EstimationUtils; +import org.apache.sysml.hops.estim.EstimatorBasicAvg; +import org.apache.sysml.hops.estim.EstimatorBasicWorst; +import org.apache.sysml.hops.estim.EstimatorBitsetMM; +import org.apache.sysml.hops.estim.EstimatorDensityMap; +import org.apache.sysml.hops.estim.EstimatorMatrixHistogram; +import org.apache.sysml.hops.estim.EstimatorSample; +import org.apache.sysml.hops.estim.SparsityEstimator; +import org.apache.sysml.runtime.instructions.InstructionUtils; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.utils.TestUtils; + +public class SelfProductTest extends AutomatedTestBase +{ + private final static int m = 2500; + private final static double sparsity1 = 0.0001; + private final static double sparsity2 = 0.000001; + private final static double eps1 = 0.05; + private final static double eps2 = 1e-4; + private final static double eps3 = 0; + + + @Override + public void setUp() { + //do nothing + } + + @Test + public void testBasicAvgCase1() { + runSparsityEstimateTest(new EstimatorBasicAvg(), m, sparsity1); + } + + @Test + public void testBasicAvgCase2() { + runSparsityEstimateTest(new EstimatorBasicAvg(), m, sparsity2); + } + + @Test + public void testDensityMapCase1() { + runSparsityEstimateTest(new EstimatorDensityMap(), m, sparsity1); + } + + @Test + public void testDensityMapCase2() { + runSparsityEstimateTest(new EstimatorDensityMap(), m, sparsity2); + } + + @Test + public void testDensityMap7Case1() { + runSparsityEstimateTest(new EstimatorDensityMap(7), m, sparsity1); + } + + @Test + public void testDensityMap7Case2() { + runSparsityEstimateTest(new EstimatorDensityMap(7), m, sparsity2); + } + + @Test + public void testBitsetMatrixCase1() { + runSparsityEstimateTest(new EstimatorBitsetMM(), m, sparsity1); + } + + @Test + public void testBitsetMatrixCase2() { + runSparsityEstimateTest(new EstimatorBitsetMM(), m, sparsity2); + } + + @Test + public void testMatrixHistogramCase1() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(false), m, sparsity1); + } + + @Test + public void testMatrixHistogramCase2() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(false), m, sparsity2); + } + + @Test + public void testMatrixHistogramExceptCase1() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(true), m, sparsity1); + } + + @Test + public void testMatrixHistogramExceptCase2() { + runSparsityEstimateTest(new EstimatorMatrixHistogram(true), m, sparsity2); + } + + @Test + public void testSamplingDefCase1() { + runSparsityEstimateTest(new EstimatorSample(), m, sparsity1); + } + + @Test + public void testSamplingDefCase2() { + runSparsityEstimateTest(new EstimatorSample(), m, sparsity2); + } + + @Test + public void testSampling20Case1() { + runSparsityEstimateTest(new EstimatorSample(0.2), m, sparsity1); + } + + @Test + public void testSampling20Case2() { + runSparsityEstimateTest(new EstimatorSample(0.2), m, sparsity2); + } + + private void runSparsityEstimateTest(SparsityEstimator estim, int n, double sp) { + MatrixBlock m1 = MatrixBlock.randOperations(m, n, sp, 1, 1, "uniform", 3); + MatrixBlock m3 = m1.aggregateBinaryOperations(m1, m1, + new MatrixBlock(), InstructionUtils.getMatMultOperator(1)); + double spExact = OptimizerUtils.getSparsity(m, m, + EstimationUtils.getSelfProductOutputNnz(m1)); + + //compare estimated and real sparsity + double est = estim.estim(m1, m1); + TestUtils.compareScalars(est, m3.getSparsity(), + (estim instanceof EstimatorBitsetMM) ? eps3 : //exact + (estim instanceof EstimatorBasicWorst) ? eps1 : eps2); + TestUtils.compareScalars(m3.getSparsity(), spExact, eps3); + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/44a1a67d/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java index 82d1891..2842905 100644 --- a/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java @@ -27,6 +27,7 @@ import org.junit.runners.Suite; @RunWith(Suite.class) @Suite.SuiteClasses({ OuterProductTest.class, + SelfProductTest.class, SquaredProductChainTest.class, SquaredProductTest.class, })