[SYSTEMML-2461] New estimation util for analyzing self-product NNZs

This patch adds a new estimation utility for analyzing the exact output
number of non-zeros of self matrix products without the need to
materialize the output matrix.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/44a1a67d
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/44a1a67d
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/44a1a67d

Branch: refs/heads/master
Commit: 44a1a67df134ccc6194ade35f51fe9736717434c
Parents: 59a0bb2
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Sat Jul 21 01:18:16 2018 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Sat Jul 21 01:18:16 2018 -0700

----------------------------------------------------------------------
 .../sysml/hops/estim/EstimationUtils.java       | 109 ++++++++++++++
 .../functions/estim/SelfProductTest.java        | 146 +++++++++++++++++++
 .../functions/estim/ZPackageSuite.java          |   1 +
 3 files changed, 256 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/44a1a67d/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java 
b/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java
new file mode 100644
index 0000000..d4d30bb
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimationUtils.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import java.util.Arrays;
+
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+import org.apache.sysml.runtime.matrix.data.SparseRowVector;
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+public abstract class EstimationUtils 
+{
+       /**
+        * This utility function computes the exact output nnz
+        * of a self matrix product without need to materialize
+        * the output.
+        * 
+        * @param m dense or sparse input matrix
+        * @return exact output number of non-zeros.
+        */
+       public static long getSelfProductOutputNnz(MatrixBlock m1) {
+               final int m = m1.getNumRows();
+               final int n = m1.getNumColumns();
+               long retNnz = 0;
+               
+               if( m1.isInSparseFormat() ) {
+                       SparseBlock a = m1.getSparseBlock();
+                       SparseRowVector tmpS = new SparseRowVector(1024);
+                       double[] tmpD = null;
+                       
+                       for( int i=0; i<m; i++ ) {
+                               if( a.isEmpty(i) ) continue;
+                               int alen = a.size(i);
+                               int apos = a.pos(i);
+                               int[] aix = a.indexes(i);
+                               double[] avals = a.values(i);
+                               
+                               //compute number of aggregated non-zeros for 
input row
+                               int nnz1 = (int) 
Math.min(UtilFunctions.computeNnz(a, aix, apos, alen), n);
+                               boolean ldense = nnz1 > n / 128;
+                               
+                               //perform vector-matrix multiply w/ dense or 
sparse output
+                               if( ldense ) { //init dense tmp row
+                                       tmpD = (tmpD == null) ? new double[n] : 
tmpD;
+                                       Arrays.fill(tmpD, 0);
+                               }
+                               else {
+                                       tmpS.setSize(0);
+                               }
+                               for( int k=apos; k<apos+alen; k++ ) {
+                                       if( a.isEmpty(aix[k]) ) continue;
+                                       int blen = a.size(aix[k]);
+                                       int bpos = a.pos(aix[k]);
+                                       int[] bix = a.indexes(aix[k]);
+                                       double aval = avals[k];
+                                       double[] bvals = a.values(aix[k]);
+                                       if( ldense ) { //dense aggregation
+                                               for( int j=bpos; j<bpos+blen; 
j++ )
+                                                       tmpD[bix[j]] += aval * 
bvals[j];
+                                       }
+                                       else { //sparse aggregation
+                                               for( int j=bpos; j<bpos+blen; 
j++ )
+                                                       tmpS.add(bix[j], aval * 
bvals[j]);
+                                       }
+                               }
+                               retNnz += !ldense ? tmpS.size() :
+                                       UtilFunctions.computeNnz(tmpD, 0, n);
+                       }
+               }
+               else { //dense
+                       DenseBlock a = m1.getDenseBlock();
+                       double[] tmp = new double[n];
+                       for( int i=0; i<m; i++ ) {
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               Arrays.fill(tmp, 0); //reset
+                               for( int k=aix; k<aix+n; k++ ) {
+                                       double aval = avals[k];
+                                       if( aval == 0 ) continue;
+                                       double[] bvals = a.values(k);
+                                       int bix = a.pos(k);
+                                       for( int j=0; j<n; j++ )
+                                               tmp[j] += aval * bvals[bix+j];
+                               }
+                               retNnz += UtilFunctions.computeNnz(tmp, 0, n);
+                       }
+               }
+               return retNnz;
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/44a1a67d/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java
new file mode 100644
index 0000000..d609f41
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/estim/SelfProductTest.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.estim;
+
+import org.junit.Test;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.hops.estim.EstimationUtils;
+import org.apache.sysml.hops.estim.EstimatorBasicAvg;
+import org.apache.sysml.hops.estim.EstimatorBasicWorst;
+import org.apache.sysml.hops.estim.EstimatorBitsetMM;
+import org.apache.sysml.hops.estim.EstimatorDensityMap;
+import org.apache.sysml.hops.estim.EstimatorMatrixHistogram;
+import org.apache.sysml.hops.estim.EstimatorSample;
+import org.apache.sysml.hops.estim.SparsityEstimator;
+import org.apache.sysml.runtime.instructions.InstructionUtils;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class SelfProductTest extends AutomatedTestBase 
+{
+       private final static int m = 2500;
+       private final static double sparsity1 = 0.0001;
+       private final static double sparsity2 = 0.000001;
+       private final static double eps1 = 0.05;
+       private final static double eps2 = 1e-4;
+       private final static double eps3 = 0;
+       
+       
+       @Override
+       public void setUp() {
+               //do  nothing
+       }
+       
+       @Test
+       public void testBasicAvgCase1() {
+               runSparsityEstimateTest(new EstimatorBasicAvg(), m, sparsity1);
+       }
+       
+       @Test
+       public void testBasicAvgCase2() {
+               runSparsityEstimateTest(new EstimatorBasicAvg(), m, sparsity2);
+       }
+       
+       @Test
+       public void testDensityMapCase1() {
+               runSparsityEstimateTest(new EstimatorDensityMap(), m, 
sparsity1);
+       }
+       
+       @Test
+       public void testDensityMapCase2() {
+               runSparsityEstimateTest(new EstimatorDensityMap(), m, 
sparsity2);
+       }
+       
+       @Test
+       public void testDensityMap7Case1() {
+               runSparsityEstimateTest(new EstimatorDensityMap(7), m, 
sparsity1);
+       }
+       
+       @Test
+       public void testDensityMap7Case2() {
+               runSparsityEstimateTest(new EstimatorDensityMap(7), m, 
sparsity2);
+       }
+       
+       @Test
+       public void testBitsetMatrixCase1() {
+               runSparsityEstimateTest(new EstimatorBitsetMM(), m, sparsity1);
+       }
+       
+       @Test
+       public void testBitsetMatrixCase2() {
+               runSparsityEstimateTest(new EstimatorBitsetMM(), m, sparsity2);
+       }
+       
+       @Test
+       public void testMatrixHistogramCase1() {
+               runSparsityEstimateTest(new EstimatorMatrixHistogram(false), m, 
sparsity1);
+       }
+       
+       @Test
+       public void testMatrixHistogramCase2() {
+               runSparsityEstimateTest(new EstimatorMatrixHistogram(false), m, 
sparsity2);
+       }
+       
+       @Test
+       public void testMatrixHistogramExceptCase1() {
+               runSparsityEstimateTest(new EstimatorMatrixHistogram(true), m, 
sparsity1);
+       }
+       
+       @Test
+       public void testMatrixHistogramExceptCase2() {
+               runSparsityEstimateTest(new EstimatorMatrixHistogram(true), m, 
sparsity2);
+       }
+       
+       @Test
+       public void testSamplingDefCase1() {
+               runSparsityEstimateTest(new EstimatorSample(), m, sparsity1);
+       }
+       
+       @Test
+       public void testSamplingDefCase2() {
+               runSparsityEstimateTest(new EstimatorSample(), m, sparsity2);
+       }
+       
+       @Test
+       public void testSampling20Case1() {
+               runSparsityEstimateTest(new EstimatorSample(0.2), m, sparsity1);
+       }
+       
+       @Test
+       public void testSampling20Case2() {
+               runSparsityEstimateTest(new EstimatorSample(0.2), m, sparsity2);
+       }
+       
+       private void runSparsityEstimateTest(SparsityEstimator estim, int n, 
double sp) {
+               MatrixBlock m1 = MatrixBlock.randOperations(m, n, sp, 1, 1, 
"uniform", 3);
+               MatrixBlock m3 = m1.aggregateBinaryOperations(m1, m1, 
+                       new MatrixBlock(), 
InstructionUtils.getMatMultOperator(1));
+               double spExact = OptimizerUtils.getSparsity(m, m,
+                       EstimationUtils.getSelfProductOutputNnz(m1));
+               
+               //compare estimated and real sparsity
+               double est = estim.estim(m1, m1);
+               TestUtils.compareScalars(est, m3.getSparsity(),
+                       (estim instanceof EstimatorBitsetMM) ? eps3 : //exact
+                       (estim instanceof EstimatorBasicWorst) ? eps1 : eps2);
+               TestUtils.compareScalars(m3.getSparsity(), spExact, eps3);
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/44a1a67d/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
----------------------------------------------------------------------
diff --git 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
index 82d1891..2842905 100644
--- 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
+++ 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/estim/ZPackageSuite.java
@@ -27,6 +27,7 @@ import org.junit.runners.Suite;
 @RunWith(Suite.class)
 @Suite.SuiteClasses({
        OuterProductTest.class,
+       SelfProductTest.class,
        SquaredProductChainTest.class,
        SquaredProductTest.class,
 })

Reply via email to