This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new df7b2cc  [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices
df7b2cc is described below

commit df7b2cc932d144e5c2dfe024d5ddc7d21a202b51
Author: Matthias Boehm <[email protected]>
AuthorDate: Tue Mar 23 00:23:45 2021 +0100

    [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices
    
    The dense and sparse memory estimates are used for multiple purposes
    including selecting distributed operations if needed. A recent change of
    these memory estimates introduced a few issues which lead overflows
    estimates memory of ultra-sparse matrices (which dense estimate exceed
    int max, and in some cases long max).
    
    * Fix int overflows on estimating memory of dense and spare matrices
    * Fix incorrect estimates used by the compiler (header in wrong place)
    * Fix missing long overflow checks
---
 .../runtime/compress/colgroup/Dictionary.java      |  2 +-
 .../org/apache/sysds/runtime/data/DenseBlock.java  | 18 +++++-----
 .../apache/sysds/runtime/data/DenseBlockFP64.java  | 13 +++----
 .../sysds/runtime/data/DenseBlockFactory.java      |  7 ++++
 .../org/apache/sysds/runtime/data/SparseBlock.java |  5 ---
 .../apache/sysds/runtime/data/SparseBlockCOO.java  |  8 ++---
 .../apache/sysds/runtime/data/SparseBlockCSR.java  | 10 +++---
 .../sysds/runtime/data/SparseBlockFactory.java     |  6 ++--
 .../apache/sysds/runtime/data/SparseBlockMCSR.java | 17 +++++----
 .../sysds/runtime/matrix/data/MatrixBlock.java     | 42 ++++++++++++----------
 .../org/apache/sysds/utils/MemoryEstimates.java    | 16 ++++-----
 .../test/component/misc/MemoryEstimateTest.java    |  4 +--
 .../component/sparse/SparseBlockMemEstimate.java   |  4 +--
 13 files changed, 79 insertions(+), 73 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
index 34fea3d..5bf1cde 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
@@ -63,7 +63,7 @@ public class Dictionary extends ADictionary {
 
        protected static long getInMemorySize(int valuesCount) {
                // object + values array
-               return 16 + MemoryEstimates.doubleArrayCost(valuesCount);
+               return 16 + (long)MemoryEstimates.doubleArrayCost(valuesCount);
        }
 
        @Override
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
index 0d414fa..53acc47 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
@@ -155,6 +155,15 @@ public abstract class DenseBlock implements Serializable
         */
        public abstract void reset(int rlen, int[] odims, double v);
        
+       
+       public static double estimateMemory(long nrows, long ncols){
+               long size = 16; // object
+               size += 4; // int
+               size += 4; // padding
+               size += MemoryEstimates.intArrayCost(1); // odims typically 1
+               size += 8; // pointer to reuse that is typically null;
+               return size;
+       }
 
        /**
         * Set the dimensions of the dense MatrixBlock.
@@ -675,13 +684,4 @@ public abstract class DenseBlock implements Serializable
                }
                return ret;
        }
-
-       public static long estimateSizeDenseInMemory(int nRows, int nCols){
-               long size = 16; // object
-               size += 4; // int
-               size += 4; // padding
-               size += MemoryEstimates.intArrayCost(1); // odims typically 1
-               size += 8; // pointer to reuse that is typically null;
-               return size;
-       }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
index 795bee1..fbb8286 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
@@ -65,6 +65,13 @@ public class DenseBlockFP64 extends DenseBlockDRB
                _rlen = rlen;
                _odims = odims;
        }
+       
+       public static double estimateMemory(long nrows, long ncols) {
+               if( (double)nrows + ncols > Long.MAX_VALUE )
+                       return Long.MAX_VALUE;
+               return DenseBlock.estimateMemory(nrows, ncols)
+                       + MemoryEstimates.doubleArrayCost(nrows * ncols);
+       }
 
        @Override
        public long capacity() {
@@ -193,10 +200,4 @@ public class DenseBlockFP64 extends DenseBlockDRB
        public long getLong(int[] ix) {
                return UtilFunctions.toLong(_data[pos(ix)]);
        }
-       
-       public static long estimateSizeDenseInMemory(int nRows, int nCols){
-               long size = DenseBlock.estimateSizeDenseInMemory(nRows, 
nCols);// pointer to reuse that is typically null;
-               size += MemoryEstimates.doubleArrayCost(nRows * nCols);
-               return size;
-       }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java
index f9c0e0a..e840585 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java
@@ -112,4 +112,11 @@ public abstract class DenseBlockFactory
                return (dblock instanceof DenseBlockDRB) ? DenseBlock.Type.DRB :
                        (dblock instanceof DenseBlockLDRB) ? 
DenseBlock.Type.LDRB : null;
        }
+
+       public static double estimateSizeDenseInMemory(long nrows, long ncols) {
+               // estimating the size of a dense matrix by the basic block 
estimate
+               // is a good approximation as for large, partitioned dense 
blocks, the
+               // array headers are in the noise and can be ignored
+               return DenseBlockFP64.estimateMemory(nrows, ncols);
+       }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
index 4375cad..d876946 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
@@ -22,8 +22,6 @@ package org.apache.sysds.runtime.data;
 import java.io.Serializable;
 import java.util.Iterator;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.matrix.data.IJV;
 
 /**
@@ -39,9 +37,6 @@ import org.apache.sysds.runtime.matrix.data.IJV;
  */
 public abstract class SparseBlock implements Serializable
 {
-
-       protected static final Log LOG = 
LogFactory.getLog(SparseBlock.class.getName());
-
        private static final long serialVersionUID = -5008747088111141395L;
        
        //internal configuration parameters for all sparse blocks
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
index ce8e707..aea34f9 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
@@ -145,14 +145,14 @@ public class SparseBlockCOO extends SparseBlock
         * @param sparsity sparsity ratio
         * @return memory estimate
         */
-       public static long estimateMemory(long nrows, long ncols, double 
sparsity) {
+       public static long estimateSizeInMemory(long nrows, long ncols, double 
sparsity) {
                double lnnz = Math.max(INIT_CAPACITY, 
Math.ceil(sparsity*nrows*ncols));
                
                //32B overhead per array, int/int/double arr in nnz 
                double size = 16 + 8;   //object + 2 int fields
-               size += MemoryEstimates.intArrayCost((int)lnnz); //rindexes 
array (row indexes)
-               size += MemoryEstimates.intArrayCost((int) lnnz); //cindexes 
array (column indexes)
-               size += MemoryEstimates.doubleArrayCost((int) lnnz); //values 
array (non-zero values)
+               size += MemoryEstimates.intArrayCost((long)lnnz); //rindexes 
array (row indexes)
+               size += MemoryEstimates.intArrayCost((long)lnnz); //cindexes 
array (column indexes)
+               size += MemoryEstimates.doubleArrayCost((long)lnnz); //values 
array (non-zero values)
                
                //robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
index 6cf474f..6e4e803 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
@@ -264,14 +264,14 @@ public class SparseBlockCSR extends SparseBlock
         * @param sparsity sparsity ratio
         * @return memory estimate
         */
-       public static long estimateMemory(long nrows, long ncols, double 
sparsity) {
+       public static long estimateSizeInMemory(long nrows, long ncols, double 
sparsity) {
                double lnnz = Math.max(INIT_CAPACITY, 
Math.ceil(sparsity*nrows*ncols));
                
                //32B overhead per array, int arr in nrows, int/double arr in 
nnz 
-               double size = 16 + 4 + 4;        //object + int field + padding
-               size += MemoryEstimates.intArrayCost((int)nrows+1); //ptr array 
(row pointers)
-               size += MemoryEstimates.intArrayCost((int) lnnz);   //indexes 
array (column indexes)
-               size += MemoryEstimates.doubleArrayCost((int) lnnz);//values 
array (non-zero values)
+               double size = 16 + 4 + 4;                            //object + 
int field + padding
+               size += MemoryEstimates.intArrayCost(nrows+1);       //ptr 
array (row pointers)
+               size += MemoryEstimates.intArrayCost((long) lnnz);   //indexes 
array (column indexes)
+               size += MemoryEstimates.doubleArrayCost((long) lnnz);//values 
array (non-zero values)
                
                //robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
diff --git 
a/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java
index 38453c8..fdaf3d7 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java
@@ -77,9 +77,9 @@ public abstract class SparseBlockFactory
 
        public static long estimateSizeSparseInMemory(SparseBlock.Type type, 
long nrows, long ncols, double sparsity) {
                switch( type ) {
-                       case MCSR: return SparseBlockMCSR.estimateMemory(nrows, 
ncols, sparsity);
-                       case CSR: return SparseBlockCSR.estimateMemory(nrows, 
ncols, sparsity);
-                       case COO: return SparseBlockCOO.estimateMemory(nrows, 
ncols, sparsity);
+                       case MCSR: return 
SparseBlockMCSR.estimateSizeInMemory(nrows, ncols, sparsity);
+                       case CSR: return 
SparseBlockCSR.estimateSizeInMemory(nrows, ncols, sparsity);
+                       case COO: return 
SparseBlockCOO.estimateSizeInMemory(nrows, ncols, sparsity);
                        default:
                                throw new RuntimeException("Unexpected sparse 
block type: "+type.toString());
                }
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
index fda83bf..ddab780 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
@@ -99,22 +99,21 @@ public class SparseBlockMCSR extends SparseBlock
         * @param sparsity sparsity ratio
         * @return memory estimate
         */
-       public static long estimateMemory(long nrows, long ncols, double 
sparsity) {
-               int cnnz = Math.max(SparseRowVector.initialCapacity, (int) 
Math.ceil(sparsity*ncols));
-               double rlen = Math.min(nrows,  Math.ceil(sparsity*nrows*ncols));
+       public static long estimateSizeInMemory(long nrows, long ncols, double 
sparsity) {
+               double cnnz = Math.max(SparseRowVector.initialCapacity, 
Math.ceil(sparsity*ncols));
+               double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
                
                //Each sparse row has a fixed overhead of 16B (object) + 12B (3 
ints),
                //24B (int array), 24B (double array), i.e., in total 76B
                //Each non-zero value requires 12B for the column-index/value 
pair.
                //Overheads for arrays, objects, and references refer to 64bit 
JVMs
                //If nnz < rows we have guaranteed also empty rows.
-               double size = 16;                //object
-               size += MemoryEstimates.objectArrayCost((int)rlen);         
//references
+               double size = 16; //object
+               size += MemoryEstimates.objectArrayCost((long)rlen); 
//references
                long sparseRowSize = 16; // object
-               sparseRowSize += MemoryEstimates.intArrayCost(cnnz);
-               sparseRowSize += MemoryEstimates.doubleArrayCost(cnnz);
-               sparseRowSize += 4*3; // integers.
-               sparseRowSize += 4; // padding to nearest 8 byte.
+               sparseRowSize += MemoryEstimates.intArrayCost((long)cnnz);
+               sparseRowSize += MemoryEstimates.doubleArrayCost((long)cnnz);
+               sparseRowSize += 4*4; // 3 integers + padding
                size += rlen * sparseRowSize; //sparse rows
                
                // robustness for long overflows
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index ff8cf31..ab83d12 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -52,7 +52,6 @@ import 
org.apache.sysds.runtime.controlprogram.caching.CacheBlock;
 import org.apache.sysds.runtime.controlprogram.caching.LazyWriteBuffer;
 import org.apache.sysds.runtime.controlprogram.caching.MatrixObject.UpdateType;
 import org.apache.sysds.runtime.data.DenseBlock;
-import org.apache.sysds.runtime.data.DenseBlockFP64;
 import org.apache.sysds.runtime.data.DenseBlockFactory;
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.data.SparseBlockCOO;
@@ -2425,6 +2424,16 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        ////////
        // Estimates size and sparsity
 
+       public static long getHeaderSize() {
+               // basic variables and references sizes
+               long size = 16; // header
+               size += 12; // ints
+               size += 1; // boolean
+               size += 3; // padding
+               size += 8 * 2; // object references
+               return size;
+       }
+       
        public long estimateSizeInMemory() {
                return estimateSizeInMemory(rlen, clen, getSparsity());
        }
@@ -2434,34 +2443,29 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                //determine sparse/dense representation
                boolean sparse = evalSparseFormatInMemory(nrows, ncols, 
(long)(sparsity*nrows*ncols));
 
-               // basic variables and references sizes
-               long size = 16; // header
-               size += 12; // ints
-               size += 1; // boolean
-               size += 3; // padding
-               size += 8 * 2; // object references             
-
                //estimate memory consumption for sparse/dense
                if( sparse )
-                       return size + estimateSizeSparseInMemory(nrows, ncols, 
sparsity);
+                       return estimateSizeSparseInMemory(nrows, ncols, 
sparsity);
                else
-                       return size + estimateSizeDenseInMemory(nrows, ncols);
+                       return estimateSizeDenseInMemory(nrows, ncols);
        }
 
-       public static long estimateSizeDenseInMemory(long nrows, long ncols)
-       {
-               return (long) 
Math.min(DenseBlockFP64.estimateSizeDenseInMemory((int)nrows, (int)ncols), 
Long.MAX_VALUE);
+       public static long estimateSizeDenseInMemory(long nrows, long ncols) {
+               double size = getHeaderSize()
+                       + DenseBlockFactory.estimateSizeDenseInMemory(nrows, 
ncols);
+               // robustness for long overflows
+               return (long) Math.min(size, Long.MAX_VALUE);
        }
 
        public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity) {
                return estimateSizeSparseInMemory(nrows, ncols, sparsity, 
DEFAULT_SPARSEBLOCK);
        }
        
-       public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity, SparseBlock.Type stype)
-       {
-               // delegate memory estimate to individual sparse blocks
-               return Math.min(SparseBlockFactory.estimateSizeSparseInMemory(
-                       stype, nrows, ncols, sparsity),Long.MAX_VALUE);
+       public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity, SparseBlock.Type stype) {
+               double size = getHeaderSize()
+                       + SparseBlockFactory.estimateSizeSparseInMemory(stype, 
nrows, ncols, sparsity);
+               // robustness for long overflows
+               return (long) Math.min(size, Long.MAX_VALUE);
        }
 
        public long estimateSizeOnDisk()
@@ -5369,7 +5373,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                double w = thatScalar;
                
                //sparse-unsafe ctable execution
-               //(because input values of 0 are invalid and have to result in 
errors)          
+               //(because input values of 0 are invalid and have to result in 
errors)
                //resultBlock guaranteed to be allocated for ctableexpand
                //each row in resultBlock will be allocated and will contain 
exactly one value
                int maxCol = 0;
diff --git a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java 
b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
index 473332f..0a067e4 100644
--- a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
+++ b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
@@ -97,15 +97,15 @@ public class MemoryEstimates {
         * @param length The length of the array.
         * @return The memory estimate in bytes
         */
-       public static long intArrayCost(int length) {
-               long size = 0;
+       public static double intArrayCost(long length) {
+               double size = 0;
                size += 8; // _ptr int[] reference
                size += 20; // int array Object header
                if(length <= 1) {
                        size += 4;
                }
                else {
-                       size += length * 4; // offsets 4 bytes per int
+                       size += 4d * length; // offsets 4 bytes per int
                        if(length % 2 == 0) {
                                size += 4;
                        }
@@ -119,12 +119,12 @@ public class MemoryEstimates {
         * @param length The length of the array.
         * @return The memory estimate in bytes
         */
-       public static long doubleArrayCost(int length) {
-               long size = 0;
+       public static double doubleArrayCost(long length) {
+               double size = 0;
                size += 8; // _values double array reference
                size += 20; // double array object header
                size += 4; // padding inside double array object to align to 8 
bytes.
-               size += 8 * length; // Each double fills 8 Bytes
+               size += 8d * length; // Each double fills 8 Bytes
                return size;
        }
 
@@ -134,7 +134,7 @@ public class MemoryEstimates {
         * @param length The length of the array.
         * @return The memory estimate in bytes
         */
-       public static long objectArrayCost(int length) {
+       public static long objectArrayCost(long length) {
                long size = 0;
                size += 8; // reference to array
                size += 20; // header
@@ -149,7 +149,7 @@ public class MemoryEstimates {
         * @param length The length of the array.
         * @return The memory estimate in bytes
         */
-       public static long longArrayCost(int length) {
+       public static double longArrayCost(int length) {
                return doubleArrayCost(length);
                // exactly the same size as a double array
        }
diff --git 
a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java 
b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
index 5ae5ed6..a2ff78a 100644
--- a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
+++ b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
@@ -80,11 +80,11 @@ public class MemoryEstimateTest {
                                break;
                        case INT:
                                int[] arrayInt = new int[length];
-                               
assertEquals(MemoryEstimates.intArrayCost(length), measure(arrayInt));
+                               
assertEquals((long)MemoryEstimates.intArrayCost(length), measure(arrayInt));
                                break;
                        case DOUBLE:
                                double[] arrayDouble = new double[length];
-                               
assertEquals(MemoryEstimates.doubleArrayCost(length), measure(arrayDouble));
+                               
assertEquals((long)MemoryEstimates.doubleArrayCost(length), 
measure(arrayDouble));
                                break;
                        default:
                                
System.out.println(arrayToMeasure.getClass().getSimpleName());
diff --git 
a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
 
b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
index 49b9fc7..ef0b567 100644
--- 
a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
+++ 
b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
@@ -35,7 +35,7 @@ import org.apache.sysds.test.TestUtils;
 public class SparseBlockMemEstimate extends AutomatedTestBase 
 {
        private final static int rows = 662;
-       private final static int cols = 444;    
+       private final static int cols = 444;
        private final static double sparsity1 = 0.39;
        private final static double sparsity2 = 0.0001;
        
@@ -88,7 +88,7 @@ public class SparseBlockMemEstimate extends AutomatedTestBase
                        if( memMCSR < memCOO )
                                Assert.fail("SparseBlockMCSR memory estimate 
smaller than SparseBlockCOO estimate.");
                        if( memCSR < memCOO )
-                               Assert.fail("SparseBlockCSR memory estimate 
smaller than SparseBlockCOO estimate.");    
+                               Assert.fail("SparseBlockCSR memory estimate 
smaller than SparseBlockCOO estimate.");
                }
        }
 }
\ No newline at end of file

Reply via email to