[SYSTEMML-2046,2049] Enable large dense blocks, incl rand, write, alloc

This patch enables the runtime allocation of large dense blocks (but
unless forced to single node, the compiler falls back to distributed
operations) along with proper exception handling to indicate operations
that only work with a single dense blocks. This allows now end-to-end
experiments without internal changes.

Furthermore, this adds support for large dense blocks in rand operations
and local evictions. Additionally, this also includes a fix for the
allocation of large dense blocks which physical data size computed
incorrectly so far.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/642a0063
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/642a0063
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/642a0063

Branch: refs/heads/master
Commit: 642a006387951092361f36913545e9c0b9dc3787
Parents: 6c4cc17
Author: Matthias Boehm <[email protected]>
Authored: Mon Jan 1 21:22:59 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Mon Jan 1 21:22:59 2018 -0800

----------------------------------------------------------------------
 .../runtime/matrix/data/DenseBlockLDRB.java     | 15 +++----
 .../runtime/matrix/data/LibMatrixDatagen.java   | 34 ++++++++-------
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 45 ++++++++++----------
 3 files changed, 47 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/642a0063/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
index 7dfe57f..fec0d60 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
@@ -61,15 +61,15 @@ public class DenseBlockLDRB extends DenseBlock
                int numPart = (int)Math.ceil((double)rlen / blen);
                if( this.blen == blen && llen < capacity() ) {
                        for(int i=0; i<numPart; i++) {
-                               int len = Math.min((i+1)*blen,rlen)-i*blen;
-                               Arrays.fill(data[i], 0, len, v);
+                               int lrlen = 
(int)(Math.min((i+1)*blen,rlen)-i*blen);
+                               Arrays.fill(data[i], 0, lrlen*clen, v);
                        }
                }
                else {
                        data = new double[numPart][];
                        for(int i=0; i<numPart; i++) {
-                               int len = Math.min((i+1)*blen,rlen)-i*blen;
-                               data[i] = new double[len];
+                               int lrlen = 
(int)(Math.min((i+1)*blen,rlen)-i*blen);
+                               data[i] = new double[lrlen*clen];
                                if( v != 0 )
                                        Arrays.fill(data[i], v);
                        }
@@ -120,11 +120,8 @@ public class DenseBlockLDRB extends DenseBlock
        @Override
        public long countNonZeros() {
                long nnz = 0;
-               for(int i=0; i<numBlocks(); i++ ) {
-                       double[] a = valuesAt(i);
-                       for(int j=0; j<a.length; j++)
-                               nnz += (a[j]!=0) ? 1 : 0;
-               }
+               for(int i=0; i<numBlocks(); i++ )
+                       nnz += UtilFunctions.computeNnz(valuesAt(i), 0, 
size(i));
                return nnz;
        }
        

http://git-wip-us.apache.org/repos/asf/systemml/blob/642a0063/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
index d67fb15..611eed3 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
@@ -236,7 +236,7 @@ public class LibMatrixDatagen
                if( out.sparse )
                        out.allocateSparseRowsBlock();
                else
-                       out.allocateDenseBlock();       
+                       out.allocateDenseBlock();
                
                int nrb = (int) Math.ceil((double)rows/rpb);
                int ncb = (int) Math.ceil((double)cols/cpb);
@@ -323,7 +323,7 @@ public class LibMatrixDatagen
                if( out.sparse )
                        out.allocateSparseRowsBlock();
                else
-                       out.allocateDenseBlock();       
+                       out.allocateDenseBlock();
        
                int nrb = (int) Math.ceil((double)rows/rpb);
                int ncb = (int) Math.ceil((double)cols/cpb);
@@ -351,7 +351,7 @@ public class LibMatrixDatagen
                                int cu = parcol ? Math.min((i+1)*blklen, parnb) 
: ncb;
                                long[] lseeds = sliceSeedsForCP(seeds, rl, ru, 
cl, cu, nrb, ncb);
                                tasks.add(new RandTask(rl, ru, cl, cu, out, 
-                                               rgen, lnnzInBlocks, bSeed, 
lseeds) );   
+                                               rgen, lnnzInBlocks, bSeed, 
lseeds) );
                        }
                        List<Future<Object>> ret = pool.invokeAll(tasks);
                        pool.shutdown();
@@ -544,7 +544,7 @@ public class LibMatrixDatagen
                                // Note that, "pdf" parameter applies only to 
cell values and the individual cells 
                                // are always selected uniformly at random.
                                UniformPRNGenerator nnzPRNG = new 
UniformPRNGenerator(seed);
-
+                               
                                // block-level sparsity, which may differ from 
overall sparsity in the matrix.
                                // (e.g., border blocks may fall under skinny 
matrix turn point, in CP this would be 
                                // irrelevant but we need to ensure consistency 
with MR)
@@ -556,17 +556,15 @@ public class LibMatrixDatagen
                                        int ridx=0, cidx=0; // idx translates 
into (ridx, cidx) entry within the block
                                        int skip = -1;
                                        double p = sparsity;
-                               
+                                       
                                        // Prob [k-1 zeros before a nonzero] = 
Prob [k-1 < log(uniform)/log(1-p) < k] = p*(1-p)^(k-1), where p=sparsity
                                        double log1mp = Math.log(1-p);
                                        long blocksize = blockrows*blockcols;
                                        while(idx < blocksize) {
                                                skip = (int) Math.ceil( 
Math.log(nnzPRNG.nextDouble())/log1mp )-1;
                                                idx = idx+skip+1;
-
                                                if ( idx > blocksize)
                                                        break;
-                                               
                                                // translate idx into (r,c) 
within the block
                                                ridx = (idx-1)/blockcols;
                                                cidx = (idx-1)%blockcols;
@@ -577,11 +575,13 @@ public class LibMatrixDatagen
                                }
                                else {
                                        if (sparsity == 1.0) {
-                                               double[] c = 
out.getDenseBlockValues();
-                                               int cix = rowoffset*cols + 
coloffset;
-                                               for(int ii = 0; ii < blockrows; 
ii++, cix+=cols)
+                                               DenseBlock c = 
out.getDenseBlock();
+                                               for(int ii = 0; ii < blockrows; 
ii++) {
+                                                       double[] cvals = 
c.values(rowoffset+ii);
+                                                       int cix = 
c.pos(rowoffset+ii, coloffset);
                                                        for(int jj = 0; jj < 
blockcols; jj++)
-                                                               c[cix+jj] = min 
+ (range * valuePRNG.nextDouble());
+                                                               cvals[cix+jj] = 
min + (range * valuePRNG.nextDouble());
+                                               }
                                        }
                                        else {
                                                if (out.sparse ) {
@@ -605,17 +605,19 @@ public class LibMatrixDatagen
                                                        }
                                                }
                                                else {
-                                                       double[] c = 
out.getDenseBlockValues();
-                                                       int cix = 
rowoffset*cols + coloffset;
-                                                       for(int ii = 0; ii < 
blockrows; ii++, cix+=cols)
+                                                       DenseBlock c = 
out.getDenseBlock();
+                                                       for(int ii = 0; ii < 
blockrows; ii++) {
+                                                               double[] cvals 
= c.values(rowoffset+ii);
+                                                               int cix = 
c.pos(rowoffset+ii, coloffset);
                                                                for(int jj = 0; 
jj < blockcols; jj++)
                                                                        
if(nnzPRNG.nextDouble() <= sparsity)
-                                                                               
c[cix+jj] =  min + (range * valuePRNG.nextDouble());
+                                                                               
cvals[cix+jj] =  min + (range * valuePRNG.nextDouble());
+                                                       }
                                                }
                                        }
                                } // sparse or dense 
                        } // cbj
-               } // rbi        
+               } // rbi
        }
 
        private static void checkMatrixDimensionsAndSparsity(int rows, int 
cols, double sp) 

http://git-wip-us.apache.org/repos/asf/systemml/blob/642a0063/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index b34353f..3f344c6 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -336,20 +336,9 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return this;
        }
        
-       public boolean allocateDenseBlock(boolean clearNNZ) 
-       {
-               long limit = (long)rlen * clen;
-               
-               //check max size constraint (16GB dense), since java arrays are 
limited to 2^(32-1) elements)
-               if( limit > Integer.MAX_VALUE ) {
-                       String execType = OptimizerUtils.isSparkExecutionMode() 
? "SPARK" : "MR";
-                       throw new RuntimeException("Dense in-memory matrix 
block ("+rlen+"x"+clen+") "
-                               + "exceeds supported size of 
"+Integer.MAX_VALUE+" elements (16GB). "
-                               + "Please, report this issue and reduce the JVM 
heapsize to execute "
-                               + "this operation in "+execType+".");
-               }
-               
+       public boolean allocateDenseBlock(boolean clearNNZ) {
                //allocate block if non-existing or too small (guaranteed to be 
0-initialized),
+               long limit = (long)rlen * clen;
                boolean reset = (denseBlock == null || denseBlock.capacity() < 
limit);
                if( denseBlock == null )
                        denseBlock = DenseBlockFactory.createDenseBlock(rlen, 
clen);
@@ -524,8 +513,13 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        }
        
        public double[] getDenseBlockValues() {
-               return (denseBlock != null) ?
-                       denseBlock.valuesAt(0) : null;
+               //this method is used as a short-hand for all operations that
+               //guaranteed only deal with dense blocks of a single block.
+               if( denseBlock != null && denseBlock.numBlocks() > 1 ) {
+                       throw new RuntimeException("Large dense in-memory block 
(with numblocks="+denseBlock.numBlocks()+") "
+                               + "allocated but operation access to first 
block only, which might cause incorrect results.");
+               }
+               return (denseBlock != null) ? denseBlock.valuesAt(0) : null;
        }
        
        public SparseBlock getSparseBlock() {
@@ -2002,13 +1996,20 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        {
                out.writeByte( BlockType.DENSE_BLOCK.ordinal() );
                
-               int limit=rlen*clen;
-               double[] a = getDenseBlockValues();
-               if( out instanceof MatrixBlockDataOutput ) //fast serialize
-                       ((MatrixBlockDataOutput)out).writeDoubleArray(limit, a);
-               else //general case (if fast serialize not supported)
-                       for(int i=0; i<limit; i++)
-                               out.writeDouble(a[i]);
+               DenseBlock a = getDenseBlock();
+               if( out instanceof MatrixBlockDataOutput ) { //fast serialize
+                       MatrixBlockDataOutput mout = (MatrixBlockDataOutput)out;
+                       for(int i=0; i<a.numBlocks(); i++)
+                               mout.writeDoubleArray(a.size(i), a.valuesAt(i));
+               }
+               else { //general case (if fast serialize not supported)
+                       for(int i=0; i<a.numBlocks(); i++) {
+                               double[] avals = a.values(i);
+                               int limit = a.size(i);
+                               for(int j=0; j<limit; j++)
+                                       out.writeDouble(avals[j]);
+                       }
+               }
        }
 
        private void writeSparseBlock(DataOutput out) 

Reply via email to