[SYSTEMML-2263] Fix too conservative sparse block size estimates

This patch fixes outdated and too conservative size estimates of all
sparse block formats. In contrast to previous assumptions, we now assume
an array to require a header of 24 bytes and every object 16 bytes
(which is still sufficiently conservative wrt potential padding).
Overall, this improves performance in two dimensions: (1) avoid
unnecessary distributed operations although the sparse matrix would fit
in CP, and (2) avoid unnecessary buffer pool serialization due to
exceeded in-memory/on-size size overhead thresholds.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/45d86bd2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/45d86bd2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/45d86bd2

Branch: refs/heads/master
Commit: 45d86bd20bac85bd1129813219925a2a8cbdf45a
Parents: ba06d05
Author: Matthias Boehm <[email protected]>
Authored: Fri Apr 20 00:37:39 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Apr 20 00:37:39 2018 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/SparseBlockCOO.java       |  6 +++---
 .../sysml/runtime/matrix/data/SparseBlockCSR.java       |  6 +++---
 .../sysml/runtime/matrix/data/SparseBlockMCSR.java      | 12 ++++++------
 3 files changed, 12 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/45d86bd2/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
index b2a234a..1c8e3fe 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
@@ -146,9 +146,9 @@ public class SparseBlockCOO extends SparseBlock
                
                //32B overhead per array, int/int/double arr in nnz 
                double size = 16 + 8;   //object + 2 int fields
-               size += 32 + lnnz * 4d; //rindexes array (row indexes)
-               size += 32 + lnnz * 4d; //cindexes array (column indexes)
-               size += 32 + lnnz * 8d; //values array (non-zero values)
+               size += 24 + lnnz * 4d; //rindexes array (row indexes)
+               size += 24 + lnnz * 4d; //cindexes array (column indexes)
+               size += 24 + lnnz * 8d; //values array (non-zero values)
                
                //robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);

http://git-wip-us.apache.org/repos/asf/systemml/blob/45d86bd2/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index 6bbc81d..1365f95 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -269,9 +269,9 @@ public class SparseBlockCSR extends SparseBlock
                
                //32B overhead per array, int arr in nrows, int/double arr in 
nnz 
                double size = 16 + 4;        //object + int field
-               size += 32 + (nrows+1) * 4d; //ptr array (row pointers)
-               size += 32 + lnnz * 4d;      //indexes array (column indexes)
-               size += 32 + lnnz * 8d;      //values array (non-zero values)
+               size += 24 + (nrows+1) * 4d; //ptr array (row pointers)
+               size += 24 + lnnz * 4d;      //indexes array (column indexes)
+               size += 24 + lnnz * 8d;      //values array (non-zero values)
                
                //robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);

http://git-wip-us.apache.org/repos/asf/systemml/blob/45d86bd2/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
index fe63f2b..4cbf49a 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
@@ -102,14 +102,14 @@ public class SparseBlockMCSR extends SparseBlock
                double cnnz = Math.max(SparseRowVector.initialCapacity, 
Math.ceil(sparsity*ncols));
                double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
                
-               //Each sparse row has a fixed overhead of 8B (reference) + 32B 
(object) +
-               //12B (3 int members), 32B (overhead int array), 32B (overhead 
double array),
+               //Each sparse row has a fixed overhead of 16B (object) + 12B (3 
ints),
+               //24B (int array), 24B (double array), i.e., in total 76B
                //Each non-zero value requires 12B for the column-index/value 
pair.
                //Overheads for arrays, objects, and references refer to 64bit 
JVMs
-               //If nnz < than rows we have only also empty rows.
-               double size = 16;                 //object
-               size += rlen * (116 + cnnz * 12); //sparse rows
-               size += 32 + nrows * 8d;          //references
+               //If nnz < rows we have guaranteed also empty rows.
+               double size = 16;                //object
+               size += 24 + nrows * 8d;         //references
+               size += rlen * (76 + cnnz * 12); //sparse rows
                
                // robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);

Reply via email to