[SYSTEMML-2263] Fix too conservative sparse block size estimates This patch fixes outdated and too conservative size estimates of all sparse block formats. In contrast to previous assumptions, we now assume an array to require a header of 24 bytes and every object 16 bytes (which is still sufficiently conservative wrt potential padding). Overall, this improves performance in two dimensions: (1) avoid unnecessary distributed operations although the sparse matrix would fit in CP, and (2) avoid unnecessary buffer pool serialization due to exceeded in-memory/on-size size overhead thresholds.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/45d86bd2 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/45d86bd2 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/45d86bd2 Branch: refs/heads/master Commit: 45d86bd20bac85bd1129813219925a2a8cbdf45a Parents: ba06d05 Author: Matthias Boehm <[email protected]> Authored: Fri Apr 20 00:37:39 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Fri Apr 20 00:37:39 2018 -0700 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/SparseBlockCOO.java | 6 +++--- .../sysml/runtime/matrix/data/SparseBlockCSR.java | 6 +++--- .../sysml/runtime/matrix/data/SparseBlockMCSR.java | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/45d86bd2/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java index b2a234a..1c8e3fe 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java @@ -146,9 +146,9 @@ public class SparseBlockCOO extends SparseBlock //32B overhead per array, int/int/double arr in nnz double size = 16 + 8; //object + 2 int fields - size += 32 + lnnz * 4d; //rindexes array (row indexes) - size += 32 + lnnz * 4d; //cindexes array (column indexes) - size += 32 + lnnz * 8d; //values array (non-zero values) + size += 24 + lnnz * 4d; //rindexes array (row indexes) + size += 24 + lnnz * 4d; //cindexes array (column indexes) + size += 24 + lnnz * 8d; //values array (non-zero values) //robustness for long overflows return (long) Math.min(size, Long.MAX_VALUE); http://git-wip-us.apache.org/repos/asf/systemml/blob/45d86bd2/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java index 6bbc81d..1365f95 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java @@ -269,9 +269,9 @@ public class SparseBlockCSR extends SparseBlock //32B overhead per array, int arr in nrows, int/double arr in nnz double size = 16 + 4; //object + int field - size += 32 + (nrows+1) * 4d; //ptr array (row pointers) - size += 32 + lnnz * 4d; //indexes array (column indexes) - size += 32 + lnnz * 8d; //values array (non-zero values) + size += 24 + (nrows+1) * 4d; //ptr array (row pointers) + size += 24 + lnnz * 4d; //indexes array (column indexes) + size += 24 + lnnz * 8d; //values array (non-zero values) //robustness for long overflows return (long) Math.min(size, Long.MAX_VALUE); http://git-wip-us.apache.org/repos/asf/systemml/blob/45d86bd2/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java index fe63f2b..4cbf49a 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java @@ -102,14 +102,14 @@ public class SparseBlockMCSR extends SparseBlock double cnnz = Math.max(SparseRowVector.initialCapacity, Math.ceil(sparsity*ncols)); double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols)); - //Each sparse row has a fixed overhead of 8B (reference) + 32B (object) + - //12B (3 int members), 32B (overhead int array), 32B (overhead double array), + //Each sparse row has a fixed overhead of 16B (object) + 12B (3 ints), + //24B (int array), 24B (double array), i.e., in total 76B //Each non-zero value requires 12B for the column-index/value pair. //Overheads for arrays, objects, and references refer to 64bit JVMs - //If nnz < than rows we have only also empty rows. - double size = 16; //object - size += rlen * (116 + cnnz * 12); //sparse rows - size += 32 + nrows * 8d; //references + //If nnz < rows we have guaranteed also empty rows. + double size = 16; //object + size += 24 + nrows * 8d; //references + size += rlen * (76 + cnnz * 12); //sparse rows // robustness for long overflows return (long) Math.min(size, Long.MAX_VALUE);
