[SYSTEMML-383] Integrated sparse block memory estimates / valid nnz

Incl new sparse block test for relative memory estimates between MCSR,
CSR, COO, and dense formats.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9c422821
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9c422821
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9c422821

Branch: refs/heads/master
Commit: 9c4228215a2cb9b2df351a4660f36d9b63d252c8
Parents: a19a14c
Author: Matthias Boehm <[email protected]>
Authored: Sat Jan 23 01:02:32 2016 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Sat Jan 23 16:08:22 2016 -0800

----------------------------------------------------------------------
 .../org/apache/sysml/hops/OptimizerUtils.java   |  9 +-
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 15 +--
 .../runtime/matrix/data/SparseBlockCOO.java     | 25 +++++
 .../runtime/matrix/data/SparseBlockCSR.java     | 25 +++++
 .../runtime/matrix/data/SparseBlockFactory.java | 18 ++++
 .../runtime/matrix/data/SparseBlockMCSR.java    | 26 +++++
 .../sparse/SparseBlockMemEstimate.java          | 99 ++++++++++++++++++++
 .../functions/sparse/ZPackageSuite.java         |  1 +
 8 files changed, 203 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java 
b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 19acbd1..57d38d7 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -23,7 +23,6 @@ import java.util.HashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
 import org.apache.sysml.conf.ConfigurationManager;
@@ -42,6 +41,7 @@ import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
 import org.apache.sysml.runtime.matrix.data.SparseRow;
 import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer;
@@ -73,7 +73,10 @@ public class OptimizerUtils
        public static final double BIT_SIZE = (double)1/8;
        public static final double INVALID_SIZE = -1d; // memory estimate not 
computed
 
+       //constants for valid CP matrix dimension sizes / nnz (dense/sparse)
        public static final long MAX_NUMCELLS_CP_DENSE = Integer.MAX_VALUE;
+       public static final long MAX_NNZ_CP_SPARSE = 
(MatrixBlock.DEFAULT_SPARSEBLOCK == 
+                       SparseBlock.Type.MCSR) ? Long.MAX_VALUE : 
Integer.MAX_VALUE;
        
        /**
         * Enables/disables dynamic re-compilation of lops/instructions.
@@ -862,8 +865,8 @@ public class OptimizerUtils
                
                if( sparse ) //SPARSE
                {
-                       //check max nnz
-                       ret = (nnz <= Long.MAX_VALUE);
+                       //check max nnz (dependent on sparse block format)
+                       ret = (nnz <= MAX_NNZ_CP_SPARSE);
                }
                else //DENSE
                {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 22c6347..9a7bfcb 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -2556,18 +2556,9 @@ public class MatrixBlock extends MatrixValue implements 
Externalizable
                // basic variables and references sizes
                double size = 44;
                
-               //NOTES:
-               // * Each sparse row has a fixed overhead of 8B (reference) + 
32B (object) +
-               //   12B (3 int members), 32B (overhead int array), 32B 
(overhead double array),
-               // * Each non-zero value requires 12B for the 
column-index/value pair.
-               // * Overheads for arrays, objects, and references refer to 
64bit JVMs
-               // * If nnz < than rows we have only also empty rows.
-               
-               // account for sparsity and initial capacity
-               double cnnz = Math.max(SparseRow.initialCapacity, 
Math.ceil(sparsity*ncols));
-               double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
-               size += rlen * ( 116 + 12 * cnnz ); //sparse row
-               size += nrows * 8d; //empty rows
+               // delegate memory estimate to individual sparse blocks
+               size += SparseBlockFactory.estimateSizeSparseInMemory(
+                       DEFAULT_SPARSEBLOCK, nrows, ncols, sparsity);
                
                // robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
index eea0754..5643850 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
@@ -125,6 +125,31 @@ public class SparseBlockCOO extends SparseBlock
                        }
                }
        }
+               
+       /**
+        * Get the estimated in-memory size of the sparse block in COO 
+        * with the given dimensions w/o accounting for overallocation. 
+        * 
+        * @param nrows
+        * @param ncols
+        * @param sparsity
+        * @return
+        */
+       public static long estimateMemory(long nrows, long ncols, double 
sparsity) {
+               double lnnz = Math.max(INIT_CAPACITY, 
Math.ceil(sparsity*nrows*ncols));
+               
+               //32B overhead per array, int/int/double arr in nnz 
+               double size = 16 + 8;   //object + 2 int fields
+               size += 32 + lnnz * 4d; //rindexes array (row indexes)
+               size += 32 + lnnz * 4d; //cindexes array (column indexes)
+               size += 32 + lnnz * 8d; //values array (non-zero values)
+               
+               //robustness for long overflows
+               return (long) Math.min(size, Long.MAX_VALUE);
+       }
+       
+       ///////////////////
+       //SparseBlock implementation
        
        @Override
        public void allocate(int r) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index 876b0ed..c599753 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -128,6 +128,31 @@ public class SparseBlockCSR extends SparseBlock
                }
        }
        
+       /**
+        * Get the estimated in-memory size of the sparse block in CSR 
+        * with the given dimensions w/o accounting for overallocation. 
+        * 
+        * @param nrows
+        * @param ncols
+        * @param sparsity
+        * @return
+        */
+       public static long estimateMemory(long nrows, long ncols, double 
sparsity) {
+               double lnnz = Math.max(INIT_CAPACITY, 
Math.ceil(sparsity*nrows*ncols));
+               
+               //32B overhead per array, int arr in nrows, int/double arr in 
nnz 
+               double size = 16 + 4;        //object + int field
+               size += 32 + (nrows+1) * 4d; //ptr array (row pointers)
+               size += 32 + lnnz * 4d;      //indexes array (column indexes)
+               size += 32 + lnnz * 8d;      //values array (non-zero values)
+               
+               //robustness for long overflows
+               return (long) Math.min(size, Long.MAX_VALUE);
+       }
+       
+       ///////////////////
+       //SparseBlock implementation
+
        @Override
        public void allocate(int r) {
                //do nothing everything preallocated

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
index 1ac8f16..67b3eae 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
@@ -87,4 +87,22 @@ public abstract class SparseBlockFactory
                                throw new RuntimeException("Unexpected sparse 
block type: "+type.toString());
                }
        }
+       
+       /**
+        * 
+        * @param type
+        * @param nrows
+        * @param ncols
+        * @param sparsity
+        * @return
+        */
+       public static long estimateSizeSparseInMemory(SparseBlock.Type type, 
long nrows, long ncols, double sparsity) {
+               switch( type ) {
+                       case MCSR: return SparseBlockMCSR.estimateMemory(nrows, 
ncols, sparsity);
+                       case CSR: return SparseBlockCSR.estimateMemory(nrows, 
ncols, sparsity);
+                       case COO: return SparseBlockCOO.estimateMemory(nrows, 
ncols, sparsity);
+                       default:
+                               throw new RuntimeException("Unexpected sparse 
block type: "+type.toString());
+               }
+       }
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
index 378a3f4..6e1cded 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
@@ -80,6 +80,32 @@ public class SparseBlockMCSR extends SparseBlock
        public SparseBlockMCSR(int rlen, int clen) {
                _rows = new SparseRow[rlen];
        }
+       
+       /**
+        * Get the estimated in-memory size of the sparse block in MCSR 
+        * with the given dimensions w/o accounting for overallocation. 
+        * 
+        * @param nrows
+        * @param ncols
+        * @param sparsity
+        * @return
+        */
+       public static long estimateMemory(long nrows, long ncols, double 
sparsity) {
+               double cnnz = Math.max(SparseRow.initialCapacity, 
Math.ceil(sparsity*ncols));
+               double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+               
+               //Each sparse row has a fixed overhead of 8B (reference) + 32B 
(object) +
+               //12B (3 int members), 32B (overhead int array), 32B (overhead 
double array),
+               //Each non-zero value requires 12B for the column-index/value 
pair.
+               //Overheads for arrays, objects, and references refer to 64bit 
JVMs
+               //If nnz < than rows we have only also empty rows.
+               double size = 16;                 //object
+               size += rlen * (116 + cnnz * 12); //sparse rows
+               size += 32 + nrows * 8d;          //references
+               
+               // robustness for long overflows
+               return (long) Math.min(size, Long.MAX_VALUE);
+       }
 
        ///////////////////
        //SparseBlock implementation

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
new file mode 100644
index 0000000..8d2fbd0
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.sparse;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlockFactory;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.utils.TestUtils;
+
+/**
+ * This is a sparse matrix block component test for sparse block memory
+ * estimation functionality.
+ * 
+ */
+public class SparseBlockMemEstimate extends AutomatedTestBase 
+{
+       private final static int rows = 662;
+       private final static int cols = 444;    
+       private final static double sparsity1 = 0.39;
+       private final static double sparsity2 = 0.0001;
+       
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+       }
+
+       @Test
+       public void testSparseBlockSparse()  {
+               runSparseBlockMemoryTest(sparsity1);
+       }
+       
+       @Test
+       public void testSparseBlockUltraSparse()  {
+               runSparseBlockMemoryTest(sparsity2);
+       }
+               
+       /**
+        * 
+        * @param btype
+        * @param sparsity
+        */
+       private void runSparseBlockMemoryTest( double sparsity)
+       {
+               double memMCSR = 
SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.MCSR, rows, 
cols, sparsity);
+               double memCSR = 
SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.CSR, rows, cols, 
sparsity);
+               double memCOO = 
SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.COO, rows, cols, 
sparsity);
+               double memDense = MatrixBlock.estimateSizeDenseInMemory(rows, 
cols);
+               
+               //check negative estimate
+               if( memMCSR <= 0 )
+                       Assert.fail("SparseBlockMCSR memory estimate <= 0.");
+               if( memCSR  <= 0 )
+                       Assert.fail("SparseBlockCSR memory estimate <= 0.");
+               if( memCOO  <= 0 )
+                       Assert.fail("SparseBlockCOO memory estimate <= 0.");
+               
+               //check dense estimate
+               if( memMCSR > memDense )
+                       Assert.fail("SparseBlockMCSR memory estimate larger 
than dense estimate.");
+               if( memCSR > memDense )
+                       Assert.fail("SparseBlockCSR memory estimate larger than 
dense estimate.");
+               if( memCOO > memDense )
+                       Assert.fail("SparseBlockCOO memory estimate larger than 
dense estimate.");
+               
+               //check sparse estimates relations
+               if( sparsity == sparsity1 ) { //sparse (pref CSR)
+                       if( memMCSR < memCSR )
+                               Assert.fail("SparseBlockMCSR memory estimate 
smaller than SparseBlockCSR estimate.");
+                       if( memCOO < memCSR )
+                               Assert.fail("SparseBlockCOO memory estimate 
smaller than SparseBlockCSR estimate.");
+               }
+               else { //ultra-sparse (pref COO)
+                       if( memMCSR < memCOO )
+                               Assert.fail("SparseBlockMCSR memory estimate 
smaller than SparseBlockCOO estimate.");
+                       if( memCSR < memCOO )
+                               Assert.fail("SparseBlockCSR memory estimate 
smaller than SparseBlockCOO estimate.");    
+               }
+       }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
----------------------------------------------------------------------
diff --git 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
index 633e152..01fc461 100644
--- 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
+++ 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
@@ -32,6 +32,7 @@ import org.junit.runners.Suite;
        SparseBlockGetSet.class,
        SparseBlockIndexRange.class,
        SparseBlockIterator.class,
+       SparseBlockMemEstimate.class,
        SparseBlockScan.class,
        SparseBlockSize.class,
 })

Reply via email to