This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d36d98  [SYSTEMDS-2907] Fix memory estimates dense and sparse 
matrices, part 2
3d36d98 is described below

commit 3d36d9856eb6eac6eef5d09c6293d8d7659c93ae
Author: Matthias Boehm <[email protected]>
AuthorDate: Tue Mar 23 23:44:27 2021 +0100

    [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices, part 2
    
    This patch applies some additional fixes to create consistency and avoid
    biased estimates that again created problems with ultra-sparse,
    distributed matrices (w/ billions of blocks):
    
    * Fix inconsistent header size for empty (non-allocated) blocks
    * Fix missing nnz attribute in matrix block header size
    * Fix biased estimate of MCSR sparse rows (underestimated nnz per sparse
    row if #sparserows < rows, and underestimated sparse row array sizes due
    to systematic cast to long although balanced across sparse rows)
---
 .../java/org/apache/sysds/runtime/data/SparseBlockMCSR.java | 12 +++++++-----
 .../org/apache/sysds/runtime/matrix/data/MatrixBlock.java   | 13 +++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
index ddab780..77caaec 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
@@ -100,8 +100,9 @@ public class SparseBlockMCSR extends SparseBlock
         * @return memory estimate
         */
        public static long estimateSizeInMemory(long nrows, long ncols, double 
sparsity) {
-               double cnnz = Math.max(SparseRowVector.initialCapacity, 
Math.ceil(sparsity*ncols));
-               double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+               double nnz = Math.ceil(sparsity*nrows*ncols);
+               double rlen = Math.min(nrows, nnz); // num sparse row objects
+               double cnnz = Math.max(SparseRowVector.initialCapacity, 
nnz/rlen);
                
                //Each sparse row has a fixed overhead of 16B (object) + 12B (3 
ints),
                //24B (int array), 24B (double array), i.e., in total 76B
@@ -111,11 +112,12 @@ public class SparseBlockMCSR extends SparseBlock
                double size = 16; //object
                size += MemoryEstimates.objectArrayCost((long)rlen); 
//references
                long sparseRowSize = 16; // object
-               sparseRowSize += MemoryEstimates.intArrayCost((long)cnnz);
-               sparseRowSize += MemoryEstimates.doubleArrayCost((long)cnnz);
                sparseRowSize += 4*4; // 3 integers + padding
+               sparseRowSize += MemoryEstimates.intArrayCost(0);
+               sparseRowSize += MemoryEstimates.doubleArrayCost(0);
+               sparseRowSize += 12*Math.max(1, cnnz); //avoid bias by down 
cast for ultra-sparse
                size += rlen * sparseRowSize; //sparse rows
-               
+
                // robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index ab83d12..615b28d 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -2427,10 +2427,11 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        public static long getHeaderSize() {
                // basic variables and references sizes
                long size = 16; // header
-               size += 12; // ints
-               size += 1; // boolean
+               size += 12; // 3 x ints (rlen, clen, ennz/row)
+               size += 1; // boolean (sparse)
                size += 3; // padding
-               size += 8 * 2; // object references
+               size += 8; // nonZeros
+               size += 2 * 8; // object references
                return size;
        }
        
@@ -2462,8 +2463,8 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        }
        
        public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity, SparseBlock.Type stype) {
-               double size = getHeaderSize()
-                       + SparseBlockFactory.estimateSizeSparseInMemory(stype, 
nrows, ncols, sparsity);
+               double size = getHeaderSize() + ((sparsity == 0) ? 0 : 
//allocated on demand
+                       SparseBlockFactory.estimateSizeSparseInMemory(stype, 
nrows, ncols, sparsity));
                // robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
        }
@@ -2618,7 +2619,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        public long getInMemorySize() {
                //in-memory size given by header if not allocated
                if( !isAllocated() ) 
-                       return 44;
+                       return getHeaderSize();
                //in-memory size of dense/sparse representation
                return !sparse ? estimateSizeDenseInMemory(rlen, clen) :
                        estimateSizeSparseInMemory(rlen, clen, getSparsity(),

Reply via email to