Repository: incubator-systemml
Updated Branches:
  refs/heads/master 766cc48c0 -> 50d211baa


[SYSTEMML-1140] Performance bufferpool (shallow serialize for sparse)

This patch addresses performance issues due to serialization overhead of
sparse matrices (in MCSR format) on buffer pool write. The basic idea is
an extended shallow serialize for sparse matrices. Shallow serialize
(which simply keeps a strong reference instead of serializing the
matrix) was so far only used for dense matrices and sparse matrices in
CSR format because their in-memory size is equivalent to their
serialized size. For MCSR (our default sparse block), the in-memory
representation has some overhead, so serialization helps to avoid
unnecessary evictions to disk. However, as the number of columns (or nnz
per row) grows this overhead becomes negligible. Hence, we now use an
overhead threshold of 30% and use a shallow serialize whenever the
overhead is below this threshold.

For example, on a scenario of an 100K x 10K matrix with sparsity 0.1 and
20 iterations of X = X * i, this patch improved the end-to-end runtime
from 65s to 32.7s.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a68648de
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a68648de
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a68648de

Branch: refs/heads/master
Commit: a68648ded00b0dc2510cd16ae8a0e5fa7ae822c3
Parents: 766cc48
Author: Matthias Boehm <[email protected]>
Authored: Fri Jun 2 18:10:28 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sat Jun 3 10:48:30 2017 -0700

----------------------------------------------------------------------
 .../controlprogram/caching/LazyWriteBuffer.java |  7 +++---
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 23 +++++++++++++++-----
 .../runtime/matrix/data/SparseBlockFactory.java | 10 ++++++---
 3 files changed, 28 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a68648de/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java
index 212800d..f0eb926 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/LazyWriteBuffer.java
@@ -60,9 +60,10 @@ public class LazyWriteBuffer
                throws IOException
        {       
                //obtain basic meta data of cache block
-               long lSize = cb.getExactSerializedSize();  
-               boolean requiresWrite = (   lSize > _limit  //global buffer 
limit
-                       || !ByteBuffer.isValidCapacity(lSize, cb) ); //local 
buffer limit
+               long lSize = cb.isShallowSerialize() ?
+                       cb.getInMemorySize() : cb.getExactSerializedSize();
+               boolean requiresWrite = (lSize > _limit        //global buffer 
limit
+                       || !ByteBuffer.isValidCapacity(lSize, cb)); //local 
buffer limit
        
                //handle caching/eviction if it fits in writebuffer
                if( !requiresWrite ) 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a68648de/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 780c98b..e61c6a2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -103,10 +103,13 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        public static final SparseBlock.Type DEFAULT_SPARSEBLOCK = 
SparseBlock.Type.MCSR;
        //default sparse block type for update in place: compressed sparse 
rows, to prevent serialization
        public static final SparseBlock.Type DEFAULT_INPLACE_SPARSEBLOCK = 
SparseBlock.Type.CSR;
+       //allowed overhead for shallow serialize in terms of in-memory-size/x 
<= serialized-size 
+       public static final double MAX_SHALLOW_SERIALIZE_OVERHEAD = 1.3;
        //basic header (int rlen, int clen, byte type)
        public static final int HEADER_SIZE = 9;
        
-       private static final boolean DISPLAY_STATISTICS = false; // Developer 
flag to measure performance overhead of various functions in this class
+       //internal stats flag for matrix block internals //TODO remove
+       private static final boolean DISPLAY_STATISTICS = false; 
        
        public enum BlockType{
                EMPTY_BLOCK,  
@@ -2395,14 +2398,18 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return (long) Math.min(size, Long.MAX_VALUE);
        }
 
-       public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity)
+       public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity) {
+               return estimateSizeSparseInMemory(nrows, ncols, sparsity, 
DEFAULT_SPARSEBLOCK);
+       }
+       
+       public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity, SparseBlock.Type stype)
        {
                // basic variables and references sizes
                double size = 44;
                
                // delegate memory estimate to individual sparse blocks
                size += SparseBlockFactory.estimateSizeSparseInMemory(
-                       DEFAULT_SPARSEBLOCK, nrows, ncols, sparsity);
+                       stype, nrows, ncols, sparsity);
                
                // robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
@@ -2558,8 +2565,9 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        return 44;
                //in-memory size of dense/sparse representation
                double sp = OptimizerUtils.getSparsity(rlen, clen, nonZeros);
-               return sparse ? estimateSizeSparseInMemory(rlen, clen, sp) : 
-                       estimateSizeDenseInMemory(rlen, clen);
+               return !sparse ? estimateSizeDenseInMemory(rlen, clen) :
+                       estimateSizeSparseInMemory(rlen, clen, sp,
+                       SparseBlockFactory.getSparseBlockType(sparseBlock));
        }
        
        @Override
@@ -2571,7 +2579,10 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        public boolean isShallowSerialize() {
                //shallow serialize if dense, dense in serialized form or 
already in CSR
                return !sparse || !evalSparseFormatOnDisk()
-                       || (sparse && sparseBlock instanceof SparseBlockCSR);
+                       || (sparse && sparseBlock instanceof SparseBlockCSR)
+                       || (sparse && sparseBlock instanceof SparseBlockMCSR
+                               && 
getInMemorySize()/MAX_SHALLOW_SERIALIZE_OVERHEAD 
+                               <= getExactSerializedSize());
        }
        
        @Override

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a68648de/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
index 395ba18..5abd7ba 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
@@ -59,9 +59,13 @@ public abstract class SparseBlockFactory
        }
        
        public static boolean isSparseBlockType(SparseBlock sblock, 
SparseBlock.Type type) {
-               return (sblock instanceof SparseBlockMCSR && type == 
SparseBlock.Type.MCSR)
-                       ||(sblock instanceof SparseBlockCSR && type == 
SparseBlock.Type.CSR)
-                       ||(sblock instanceof SparseBlockCOO && type == 
SparseBlock.Type.COO);
+               return (getSparseBlockType(sblock) == type);
+       }
+       
+       public static SparseBlock.Type getSparseBlockType(SparseBlock sblock) {
+               return (sblock instanceof SparseBlockMCSR) ? 
SparseBlock.Type.MCSR :
+                       (sblock instanceof SparseBlockCSR) ? 
SparseBlock.Type.CSR : 
+                       (sblock instanceof SparseBlockCOO) ? 
SparseBlock.Type.COO : null;
        }
 
        public static long estimateSizeSparseInMemory(SparseBlock.Type type, 
long nrows, long ncols, double sparsity) {

Reply via email to