This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 715374921c [SYSTEMDS-3355] MatrixBlock size estimation w/ CSR awareness
715374921c is described below
commit 715374921c46422bf3a8a6cf96e430484abaeb1f
Author: baunsgaard <[email protected]>
AuthorDate: Sun May 15 17:24:07 2022 +0200
[SYSTEMDS-3355] MatrixBlock size estimation w/ CSR awareness
Closes #1593.
---
.../sysds/runtime/matrix/data/MatrixBlock.java | 60 +++++++++++++---------
1 file changed, 35 insertions(+), 25 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 4d5b97ff3d..315871ef50 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -1102,31 +1102,18 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
* @return true if matrix block should be in sparse format in memory
*/
public boolean evalSparseFormatInMemory() {
- //ensure exact size estimates for write
- if( nonZeros<=0 )
- recomputeNonZeros();
-
- //decide on in-memory representation
- return evalSparseFormatInMemory(rlen, clen, nonZeros);
+ return evalSparseFormatInMemory(false);
}
- @SuppressWarnings("unused")
- private boolean evalSparseFormatInMemory(boolean transpose)
- {
- int lrlen = (transpose) ? clen : rlen;
- int lclen = (transpose) ? rlen : clen;
- long lnonZeros = nonZeros;
-
+ public boolean evalSparseFormatInMemory(boolean allowCSR) {
//ensure exact size estimates for write
- if( lnonZeros<=0 ) {
+ if( nonZeros<=0 )
recomputeNonZeros();
- lnonZeros = nonZeros;
- }
//decide on in-memory representation
- return evalSparseFormatInMemory(lrlen, lclen, lnonZeros);
+ return evalSparseFormatInMemory(rlen, clen, nonZeros, allowCSR);
}
-
+
/**
* Evaluates if this matrix block should be in sparse format on
* disk. This applies to any serialized matrix representation, i.e.,
@@ -1169,7 +1156,7 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
*/
public void examSparsity(boolean allowCSR) {
//determine target representation
- boolean sparseDst = evalSparseFormatInMemory();
+ boolean sparseDst = evalSparseFormatInMemory(allowCSR);
//check for empty blocks (e.g., sparse-sparse)
if( isEmptyBlock(false) ) {
@@ -1198,17 +1185,22 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
* @param nnz number of non-zeros
* @return true if matrix block shold be in sparse format in memory
*/
- public static boolean evalSparseFormatInMemory( final long nrows, final
long ncols, final long nnz )
+ public static boolean evalSparseFormatInMemory(long nrows, long ncols,
long nnz) {
+ return evalSparseFormatInMemory(nrows, ncols, nnz, false);
+ }
+
+ public static boolean evalSparseFormatInMemory(final long nrows,
+ final long ncols, final long nnz, final boolean allowCSR)
{
//evaluate sparsity threshold
double lsparsity = (double)nnz/nrows/ncols;
- boolean lsparse = (lsparsity < SPARSITY_TURN_POINT);
+ boolean lsparse = (lsparsity < SPARSITY_TURN_POINT) && ncols >
1;
//compare size of sparse and dense representation in order to
prevent
//that the sparse size exceed the dense size since we use the
dense size
//as worst-case estimate if unknown (and it requires less io
from
//main memory).
- double sizeSparse = estimateSizeSparseInMemory(nrows, ncols,
lsparsity);
+ double sizeSparse = estimateSizeSparseInMemory(nrows, ncols,
lsparsity, allowCSR);
double sizeDense = estimateSizeDenseInMemory(nrows, ncols);
return lsparse && (sizeSparse<sizeDense);
@@ -1223,8 +1215,7 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
* @param nnz number of non-zeros
* @return true if matrix block shold be in sparse format on disk
*/
- public static boolean evalSparseFormatOnDisk( final long nrows, final
long ncols, final long nnz )
- {
+ public static boolean evalSparseFormatOnDisk( final long nrows, final
long ncols, final long nnz ) {
//evaluate sparsity threshold
double lsparsity = ((double)nnz/nrows)/ncols;
boolean lsparse = (lsparsity < SPARSITY_TURN_POINT);
@@ -1233,7 +1224,7 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
double sizeSparse = estimateSizeSparseOnDisk(nrows, ncols, nnz);
double sizeDense = estimateSizeDenseOnDisk(nrows, ncols);
- return lsparse && (sizeSparse<sizeDense ||
sizeUltraSparse<sizeDense);
+ return lsparse && (sizeSparse<sizeDense ||
sizeUltraSparse<sizeDense);
}
@@ -2588,6 +2579,10 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
return estimateSizeDenseInMemory(nrows, ncols);
}
+ public long estimateSizeDenseInMemory() {
+ return estimateSizeDenseInMemory(rlen, clen);
+ }
+
public static long estimateSizeDenseInMemory(long nrows, long ncols) {
double size = getHeaderSize()
+ DenseBlockFactory.estimateSizeDenseInMemory(nrows,
ncols);
@@ -2595,9 +2590,24 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
return (long) Math.min(size, Long.MAX_VALUE);
}
+ public long estimateSizeSparseInMemory() {
+ return estimateSizeSparseInMemory(rlen, clen, getSparsity());
+ }
+
public static long estimateSizeSparseInMemory(long nrows, long ncols,
double sparsity) {
return estimateSizeSparseInMemory(nrows, ncols, sparsity,
DEFAULT_SPARSEBLOCK);
}
+
+ public static long estimateSizeSparseInMemory(long nrows, long ncols,
double sparsity, boolean allowCSR) {
+ if(allowCSR)
+ return estimateSizeSparseInMemory(nrows, ncols,
sparsity, SparseBlock.Type.CSR);
+ else
+ return estimateSizeSparseInMemory(nrows, ncols,
sparsity, DEFAULT_SPARSEBLOCK);
+ }
+
+ public long estimateSizeSparseInMemory(SparseBlock.Type stype){
+ return estimateSizeSparseInMemory(rlen, clen, getSparsity(),
stype);
+ }
public static long estimateSizeSparseInMemory(long nrows, long ncols,
double sparsity, SparseBlock.Type stype) {
double size = getHeaderSize() + ((sparsity == 0) ? 0 :
//allocated on demand