This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new df7b2cc [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices
df7b2cc is described below
commit df7b2cc932d144e5c2dfe024d5ddc7d21a202b51
Author: Matthias Boehm <[email protected]>
AuthorDate: Tue Mar 23 00:23:45 2021 +0100
[SYSTEMDS-2907] Fix memory estimates dense and sparse matrices
The dense and sparse memory estimates are used for multiple purposes
including selecting distributed operations if needed. A recent change of
these memory estimates introduced a few issues which lead overflows
estimates memory of ultra-sparse matrices (which dense estimate exceed
int max, and in some cases long max).
* Fix int overflows on estimating memory of dense and spare matrices
* Fix incorrect estimates used by the compiler (header in wrong place)
* Fix missing long overflow checks
---
.../runtime/compress/colgroup/Dictionary.java | 2 +-
.../org/apache/sysds/runtime/data/DenseBlock.java | 18 +++++-----
.../apache/sysds/runtime/data/DenseBlockFP64.java | 13 +++----
.../sysds/runtime/data/DenseBlockFactory.java | 7 ++++
.../org/apache/sysds/runtime/data/SparseBlock.java | 5 ---
.../apache/sysds/runtime/data/SparseBlockCOO.java | 8 ++---
.../apache/sysds/runtime/data/SparseBlockCSR.java | 10 +++---
.../sysds/runtime/data/SparseBlockFactory.java | 6 ++--
.../apache/sysds/runtime/data/SparseBlockMCSR.java | 17 +++++----
.../sysds/runtime/matrix/data/MatrixBlock.java | 42 ++++++++++++----------
.../org/apache/sysds/utils/MemoryEstimates.java | 16 ++++-----
.../test/component/misc/MemoryEstimateTest.java | 4 +--
.../component/sparse/SparseBlockMemEstimate.java | 4 +--
13 files changed, 79 insertions(+), 73 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
index 34fea3d..5bf1cde 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
@@ -63,7 +63,7 @@ public class Dictionary extends ADictionary {
protected static long getInMemorySize(int valuesCount) {
// object + values array
- return 16 + MemoryEstimates.doubleArrayCost(valuesCount);
+ return 16 + (long)MemoryEstimates.doubleArrayCost(valuesCount);
}
@Override
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
index 0d414fa..53acc47 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
@@ -155,6 +155,15 @@ public abstract class DenseBlock implements Serializable
*/
public abstract void reset(int rlen, int[] odims, double v);
+
+ public static double estimateMemory(long nrows, long ncols){
+ long size = 16; // object
+ size += 4; // int
+ size += 4; // padding
+ size += MemoryEstimates.intArrayCost(1); // odims typically 1
+ size += 8; // pointer to reuse that is typically null;
+ return size;
+ }
/**
* Set the dimensions of the dense MatrixBlock.
@@ -675,13 +684,4 @@ public abstract class DenseBlock implements Serializable
}
return ret;
}
-
- public static long estimateSizeDenseInMemory(int nRows, int nCols){
- long size = 16; // object
- size += 4; // int
- size += 4; // padding
- size += MemoryEstimates.intArrayCost(1); // odims typically 1
- size += 8; // pointer to reuse that is typically null;
- return size;
- }
}
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
index 795bee1..fbb8286 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
@@ -65,6 +65,13 @@ public class DenseBlockFP64 extends DenseBlockDRB
_rlen = rlen;
_odims = odims;
}
+
+ public static double estimateMemory(long nrows, long ncols) {
+ if( (double)nrows + ncols > Long.MAX_VALUE )
+ return Long.MAX_VALUE;
+ return DenseBlock.estimateMemory(nrows, ncols)
+ + MemoryEstimates.doubleArrayCost(nrows * ncols);
+ }
@Override
public long capacity() {
@@ -193,10 +200,4 @@ public class DenseBlockFP64 extends DenseBlockDRB
public long getLong(int[] ix) {
return UtilFunctions.toLong(_data[pos(ix)]);
}
-
- public static long estimateSizeDenseInMemory(int nRows, int nCols){
- long size = DenseBlock.estimateSizeDenseInMemory(nRows,
nCols);// pointer to reuse that is typically null;
- size += MemoryEstimates.doubleArrayCost(nRows * nCols);
- return size;
- }
}
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java
index f9c0e0a..e840585 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFactory.java
@@ -112,4 +112,11 @@ public abstract class DenseBlockFactory
return (dblock instanceof DenseBlockDRB) ? DenseBlock.Type.DRB :
(dblock instanceof DenseBlockLDRB) ?
DenseBlock.Type.LDRB : null;
}
+
+ public static double estimateSizeDenseInMemory(long nrows, long ncols) {
+ // estimating the size of a dense matrix by the basic block
estimate
+ // is a good approximation as for large, partitioned dense
blocks, the
+ // array headers are in the noise and can be ignored
+ return DenseBlockFP64.estimateMemory(nrows, ncols);
+ }
}
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
index 4375cad..d876946 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
@@ -22,8 +22,6 @@ package org.apache.sysds.runtime.data;
import java.io.Serializable;
import java.util.Iterator;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.sysds.runtime.matrix.data.IJV;
/**
@@ -39,9 +37,6 @@ import org.apache.sysds.runtime.matrix.data.IJV;
*/
public abstract class SparseBlock implements Serializable
{
-
- protected static final Log LOG =
LogFactory.getLog(SparseBlock.class.getName());
-
private static final long serialVersionUID = -5008747088111141395L;
//internal configuration parameters for all sparse blocks
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
index ce8e707..aea34f9 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
@@ -145,14 +145,14 @@ public class SparseBlockCOO extends SparseBlock
* @param sparsity sparsity ratio
* @return memory estimate
*/
- public static long estimateMemory(long nrows, long ncols, double
sparsity) {
+ public static long estimateSizeInMemory(long nrows, long ncols, double
sparsity) {
double lnnz = Math.max(INIT_CAPACITY,
Math.ceil(sparsity*nrows*ncols));
//32B overhead per array, int/int/double arr in nnz
double size = 16 + 8; //object + 2 int fields
- size += MemoryEstimates.intArrayCost((int)lnnz); //rindexes
array (row indexes)
- size += MemoryEstimates.intArrayCost((int) lnnz); //cindexes
array (column indexes)
- size += MemoryEstimates.doubleArrayCost((int) lnnz); //values
array (non-zero values)
+ size += MemoryEstimates.intArrayCost((long)lnnz); //rindexes
array (row indexes)
+ size += MemoryEstimates.intArrayCost((long)lnnz); //cindexes
array (column indexes)
+ size += MemoryEstimates.doubleArrayCost((long)lnnz); //values
array (non-zero values)
//robustness for long overflows
return (long) Math.min(size, Long.MAX_VALUE);
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
index 6cf474f..6e4e803 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
@@ -264,14 +264,14 @@ public class SparseBlockCSR extends SparseBlock
* @param sparsity sparsity ratio
* @return memory estimate
*/
- public static long estimateMemory(long nrows, long ncols, double
sparsity) {
+ public static long estimateSizeInMemory(long nrows, long ncols, double
sparsity) {
double lnnz = Math.max(INIT_CAPACITY,
Math.ceil(sparsity*nrows*ncols));
//32B overhead per array, int arr in nrows, int/double arr in
nnz
- double size = 16 + 4 + 4; //object + int field + padding
- size += MemoryEstimates.intArrayCost((int)nrows+1); //ptr array
(row pointers)
- size += MemoryEstimates.intArrayCost((int) lnnz); //indexes
array (column indexes)
- size += MemoryEstimates.doubleArrayCost((int) lnnz);//values
array (non-zero values)
+ double size = 16 + 4 + 4; //object +
int field + padding
+ size += MemoryEstimates.intArrayCost(nrows+1); //ptr
array (row pointers)
+ size += MemoryEstimates.intArrayCost((long) lnnz); //indexes
array (column indexes)
+ size += MemoryEstimates.doubleArrayCost((long) lnnz);//values
array (non-zero values)
//robustness for long overflows
return (long) Math.min(size, Long.MAX_VALUE);
diff --git
a/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java
index 38453c8..fdaf3d7 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockFactory.java
@@ -77,9 +77,9 @@ public abstract class SparseBlockFactory
public static long estimateSizeSparseInMemory(SparseBlock.Type type,
long nrows, long ncols, double sparsity) {
switch( type ) {
- case MCSR: return SparseBlockMCSR.estimateMemory(nrows,
ncols, sparsity);
- case CSR: return SparseBlockCSR.estimateMemory(nrows,
ncols, sparsity);
- case COO: return SparseBlockCOO.estimateMemory(nrows,
ncols, sparsity);
+ case MCSR: return
SparseBlockMCSR.estimateSizeInMemory(nrows, ncols, sparsity);
+ case CSR: return
SparseBlockCSR.estimateSizeInMemory(nrows, ncols, sparsity);
+ case COO: return
SparseBlockCOO.estimateSizeInMemory(nrows, ncols, sparsity);
default:
throw new RuntimeException("Unexpected sparse
block type: "+type.toString());
}
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
index fda83bf..ddab780 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
@@ -99,22 +99,21 @@ public class SparseBlockMCSR extends SparseBlock
* @param sparsity sparsity ratio
* @return memory estimate
*/
- public static long estimateMemory(long nrows, long ncols, double
sparsity) {
- int cnnz = Math.max(SparseRowVector.initialCapacity, (int)
Math.ceil(sparsity*ncols));
- double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+ public static long estimateSizeInMemory(long nrows, long ncols, double
sparsity) {
+ double cnnz = Math.max(SparseRowVector.initialCapacity,
Math.ceil(sparsity*ncols));
+ double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
//Each sparse row has a fixed overhead of 16B (object) + 12B (3
ints),
//24B (int array), 24B (double array), i.e., in total 76B
//Each non-zero value requires 12B for the column-index/value
pair.
//Overheads for arrays, objects, and references refer to 64bit
JVMs
//If nnz < rows we have guaranteed also empty rows.
- double size = 16; //object
- size += MemoryEstimates.objectArrayCost((int)rlen);
//references
+ double size = 16; //object
+ size += MemoryEstimates.objectArrayCost((long)rlen);
//references
long sparseRowSize = 16; // object
- sparseRowSize += MemoryEstimates.intArrayCost(cnnz);
- sparseRowSize += MemoryEstimates.doubleArrayCost(cnnz);
- sparseRowSize += 4*3; // integers.
- sparseRowSize += 4; // padding to nearest 8 byte.
+ sparseRowSize += MemoryEstimates.intArrayCost((long)cnnz);
+ sparseRowSize += MemoryEstimates.doubleArrayCost((long)cnnz);
+ sparseRowSize += 4*4; // 3 integers + padding
size += rlen * sparseRowSize; //sparse rows
// robustness for long overflows
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index ff8cf31..ab83d12 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -52,7 +52,6 @@ import
org.apache.sysds.runtime.controlprogram.caching.CacheBlock;
import org.apache.sysds.runtime.controlprogram.caching.LazyWriteBuffer;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject.UpdateType;
import org.apache.sysds.runtime.data.DenseBlock;
-import org.apache.sysds.runtime.data.DenseBlockFP64;
import org.apache.sysds.runtime.data.DenseBlockFactory;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.SparseBlockCOO;
@@ -2425,6 +2424,16 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
////////
// Estimates size and sparsity
+ public static long getHeaderSize() {
+ // basic variables and references sizes
+ long size = 16; // header
+ size += 12; // ints
+ size += 1; // boolean
+ size += 3; // padding
+ size += 8 * 2; // object references
+ return size;
+ }
+
public long estimateSizeInMemory() {
return estimateSizeInMemory(rlen, clen, getSparsity());
}
@@ -2434,34 +2443,29 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
//determine sparse/dense representation
boolean sparse = evalSparseFormatInMemory(nrows, ncols,
(long)(sparsity*nrows*ncols));
- // basic variables and references sizes
- long size = 16; // header
- size += 12; // ints
- size += 1; // boolean
- size += 3; // padding
- size += 8 * 2; // object references
-
//estimate memory consumption for sparse/dense
if( sparse )
- return size + estimateSizeSparseInMemory(nrows, ncols,
sparsity);
+ return estimateSizeSparseInMemory(nrows, ncols,
sparsity);
else
- return size + estimateSizeDenseInMemory(nrows, ncols);
+ return estimateSizeDenseInMemory(nrows, ncols);
}
- public static long estimateSizeDenseInMemory(long nrows, long ncols)
- {
- return (long)
Math.min(DenseBlockFP64.estimateSizeDenseInMemory((int)nrows, (int)ncols),
Long.MAX_VALUE);
+ public static long estimateSizeDenseInMemory(long nrows, long ncols) {
+ double size = getHeaderSize()
+ + DenseBlockFactory.estimateSizeDenseInMemory(nrows,
ncols);
+ // robustness for long overflows
+ return (long) Math.min(size, Long.MAX_VALUE);
}
public static long estimateSizeSparseInMemory(long nrows, long ncols,
double sparsity) {
return estimateSizeSparseInMemory(nrows, ncols, sparsity,
DEFAULT_SPARSEBLOCK);
}
- public static long estimateSizeSparseInMemory(long nrows, long ncols,
double sparsity, SparseBlock.Type stype)
- {
- // delegate memory estimate to individual sparse blocks
- return Math.min(SparseBlockFactory.estimateSizeSparseInMemory(
- stype, nrows, ncols, sparsity),Long.MAX_VALUE);
+ public static long estimateSizeSparseInMemory(long nrows, long ncols,
double sparsity, SparseBlock.Type stype) {
+ double size = getHeaderSize()
+ + SparseBlockFactory.estimateSizeSparseInMemory(stype,
nrows, ncols, sparsity);
+ // robustness for long overflows
+ return (long) Math.min(size, Long.MAX_VALUE);
}
public long estimateSizeOnDisk()
@@ -5369,7 +5373,7 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
double w = thatScalar;
//sparse-unsafe ctable execution
- //(because input values of 0 are invalid and have to result in
errors)
+ //(because input values of 0 are invalid and have to result in
errors)
//resultBlock guaranteed to be allocated for ctableexpand
//each row in resultBlock will be allocated and will contain
exactly one value
int maxCol = 0;
diff --git a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
index 473332f..0a067e4 100644
--- a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
+++ b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
@@ -97,15 +97,15 @@ public class MemoryEstimates {
* @param length The length of the array.
* @return The memory estimate in bytes
*/
- public static long intArrayCost(int length) {
- long size = 0;
+ public static double intArrayCost(long length) {
+ double size = 0;
size += 8; // _ptr int[] reference
size += 20; // int array Object header
if(length <= 1) {
size += 4;
}
else {
- size += length * 4; // offsets 4 bytes per int
+ size += 4d * length; // offsets 4 bytes per int
if(length % 2 == 0) {
size += 4;
}
@@ -119,12 +119,12 @@ public class MemoryEstimates {
* @param length The length of the array.
* @return The memory estimate in bytes
*/
- public static long doubleArrayCost(int length) {
- long size = 0;
+ public static double doubleArrayCost(long length) {
+ double size = 0;
size += 8; // _values double array reference
size += 20; // double array object header
size += 4; // padding inside double array object to align to 8
bytes.
- size += 8 * length; // Each double fills 8 Bytes
+ size += 8d * length; // Each double fills 8 Bytes
return size;
}
@@ -134,7 +134,7 @@ public class MemoryEstimates {
* @param length The length of the array.
* @return The memory estimate in bytes
*/
- public static long objectArrayCost(int length) {
+ public static long objectArrayCost(long length) {
long size = 0;
size += 8; // reference to array
size += 20; // header
@@ -149,7 +149,7 @@ public class MemoryEstimates {
* @param length The length of the array.
* @return The memory estimate in bytes
*/
- public static long longArrayCost(int length) {
+ public static double longArrayCost(int length) {
return doubleArrayCost(length);
// exactly the same size as a double array
}
diff --git
a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
index 5ae5ed6..a2ff78a 100644
--- a/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
+++ b/src/test/java/org/apache/sysds/test/component/misc/MemoryEstimateTest.java
@@ -80,11 +80,11 @@ public class MemoryEstimateTest {
break;
case INT:
int[] arrayInt = new int[length];
-
assertEquals(MemoryEstimates.intArrayCost(length), measure(arrayInt));
+
assertEquals((long)MemoryEstimates.intArrayCost(length), measure(arrayInt));
break;
case DOUBLE:
double[] arrayDouble = new double[length];
-
assertEquals(MemoryEstimates.doubleArrayCost(length), measure(arrayDouble));
+
assertEquals((long)MemoryEstimates.doubleArrayCost(length),
measure(arrayDouble));
break;
default:
System.out.println(arrayToMeasure.getClass().getSimpleName());
diff --git
a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
index 49b9fc7..ef0b567 100644
---
a/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
+++
b/src/test/java/org/apache/sysds/test/component/sparse/SparseBlockMemEstimate.java
@@ -35,7 +35,7 @@ import org.apache.sysds.test.TestUtils;
public class SparseBlockMemEstimate extends AutomatedTestBase
{
private final static int rows = 662;
- private final static int cols = 444;
+ private final static int cols = 444;
private final static double sparsity1 = 0.39;
private final static double sparsity2 = 0.0001;
@@ -88,7 +88,7 @@ public class SparseBlockMemEstimate extends AutomatedTestBase
if( memMCSR < memCOO )
Assert.fail("SparseBlockMCSR memory estimate
smaller than SparseBlockCOO estimate.");
if( memCSR < memCOO )
- Assert.fail("SparseBlockCSR memory estimate
smaller than SparseBlockCOO estimate.");
+ Assert.fail("SparseBlockCSR memory estimate
smaller than SparseBlockCOO estimate.");
}
}
}
\ No newline at end of file