Repository: systemml
Updated Branches:
  refs/heads/master 7b56f6131 -> 0c3a46ba1


[SYSTEMML-2046] Large dense blocks in left/right indexing operations

This patch modifies all left/right indexing operations involving dense
blocks to properly handle the new dense block abstraction with
potentially large blocks. Since left indexing relies on the copy
primitives, this modifications also applies to all other code paths that
leverage these primitives, such as all readers from HDFS and RDDs.

Additionally, this patch also includes (1) extensions to the dense block
abstraction, (2) performance improvements for block fill operations, (3)
a fix for fill operations on large blocks, (4) fixes for full aggregates
over large blocks, (5) a fix for the selection of large sparse update
inplace blocks, and (6) a cleanup of misleading statistics maintenance
in leftindexing operations.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/0c3a46ba
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/0c3a46ba
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/0c3a46ba

Branch: refs/heads/master
Commit: 0c3a46ba12d50c220ec29fd5b301f2f23b463aee
Parents: 7b56f61
Author: Matthias Boehm <[email protected]>
Authored: Fri Dec 29 22:49:46 2017 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Fri Dec 29 22:49:46 2017 -0800

----------------------------------------------------------------------
 .../cp/MatrixIndexingCPInstruction.java         |  13 +-
 .../sysml/runtime/matrix/data/DenseBlock.java   |  13 +
 .../runtime/matrix/data/DenseBlockDRB.java      |  19 +-
 .../runtime/matrix/data/DenseBlockLDRB.java     |  24 +-
 .../sysml/runtime/matrix/data/LibMatrixAgg.java |  12 +-
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 340 ++++++++-----------
 6 files changed, 201 insertions(+), 220 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java
index 4a636e1..73f7f72 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java
@@ -85,8 +85,7 @@ public final class MatrixIndexingCPInstruction extends 
IndexingCPInstruction {
                else if ( opcode.equalsIgnoreCase(LeftIndex.OPCODE))
                {
                        UpdateType updateType = mo.getUpdateType();
-                       if(DMLScript.STATISTICS)
-                       {
+                       if(DMLScript.STATISTICS) {
                                if( updateType.isInPlace() )
                                        Statistics.incrementTotalLixUIP();
                                Statistics.incrementTotalLix();
@@ -95,19 +94,17 @@ public final class MatrixIndexingCPInstruction extends 
IndexingCPInstruction {
                        MatrixBlock matBlock = 
ec.getMatrixInput(input1.getName(), getExtendedOpcode());
                        MatrixBlock resultBlock = null;
                        
-                       if(input2.getDataType() == DataType.MATRIX) 
//MATRIX<-MATRIX
-                       {
+                       if(input2.getDataType() == DataType.MATRIX) { 
//MATRIX<-MATRIX
                                MatrixBlock rhsMatBlock = 
ec.getMatrixInput(input2.getName(), getExtendedOpcode());
-                               resultBlock = 
matBlock.leftIndexingOperations(rhsMatBlock, ixrange, new MatrixBlock(), 
updateType, getExtendedOpcode());
+                               resultBlock = 
matBlock.leftIndexingOperations(rhsMatBlock, ixrange, new MatrixBlock(), 
updateType);
                                ec.releaseMatrixInput(input2.getName(), 
getExtendedOpcode());
                        }
-                       else //MATRIX<-SCALAR 
-                       {
+                       else { //MATRIX<-SCALAR 
                                if(!ixrange.isScalar())
                                        throw new DMLRuntimeException("Invalid 
index range of scalar leftindexing: "+ixrange.toString()+"." );
                                ScalarObject scalar = 
ec.getScalarInput(input2.getName(), ValueType.DOUBLE, input2.isLiteral());
                                resultBlock = (MatrixBlock) 
matBlock.leftIndexingOperations(scalar, 
-                                               (int)ixrange.rowStart, 
(int)ixrange.colStart, new MatrixBlock(), updateType);
+                                       (int)ixrange.rowStart, 
(int)ixrange.colStart, new MatrixBlock(), updateType);
                        }
 
                        //unpin lhs input

http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
index cf3f6db..e2177e2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
@@ -238,6 +238,19 @@ public abstract class DenseBlock implements Serializable
        public abstract void set(DenseBlock db);
        
        /**
+        * Copy the given dense block into the specified
+        * index range.
+        * 
+        * @param rl row lower index
+        * @param ru row upper index (exclusive)
+        * @param cl column lower index 
+        * @param cu column upper index (exclusive)
+        * @param db dense block
+        */
+       public abstract void set(int rl, int ru, int cl, int cu, DenseBlock db);
+       
+       
+       /**
         * Copy the given kahan object sum and correction.
         * 
         * @param kbuff kahan object

http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
index ae526eb..f1520c0 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
@@ -162,8 +162,11 @@ public class DenseBlockDRB extends DenseBlock
        
        @Override
        public void set(int rl, int ru, int cl, int cu, double v) {
-               for(int i=rl, ix=rl*clen; i<ru; i++, ix+=clen)
-                       Arrays.fill(data, ix+cl, ix+cu, v);
+               if( cl==0 && cu == clen )
+                       Arrays.fill(data, rl*clen, ru*clen, v);
+               else
+                       for(int i=rl, ix=rl*clen; i<ru; i++, ix+=clen)
+                               Arrays.fill(data, ix+cl, ix+cu, v);
        }
 
        @Override
@@ -175,6 +178,18 @@ public class DenseBlockDRB extends DenseBlock
        public void set(DenseBlock db) {
                System.arraycopy(db.valuesAt(0), 0, data, 0, rlen*clen);
        }
+       
+       @Override
+       public void set(int rl, int ru, int cl, int cu, DenseBlock db) {
+               double[] a = db.valuesAt(0);
+               if( cl == 0 && cu == clen)
+                       System.arraycopy(a, 0, data, rl*clen+cl, 
(int)db.size());
+               else {
+                       int len = cu - cl;
+                       for(int i=rl, ix1=0, ix2=rl*clen+cl; i<ru; i++, 
ix1+=len, ix2+=clen)
+                               System.arraycopy(a, ix1, data, ix2, len);
+               }
+       }
 
        @Override
        public void set(int r, double[] v) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
index cd25516..ff3357b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
@@ -177,11 +177,17 @@ public class DenseBlockLDRB extends DenseBlock
        
        @Override
        public void set(int rl, int ru, int cl, int cu, double v) {
-               for(int bi=index(rl); bi<=index(ru); bi++) {
-                       int imin = pos(bi);
-                       int imax = Math.min(ru, (bi+1)*blen)-bi;
-                       for(int i=imin, ix=imin*clen; i<imax; i++, ix+=clen)
-                               Arrays.fill(data, ix+cl, ix+cu, v);
+               boolean rowBlock = (cl == 0 && cu == clen);
+               final int bil = index(rl);
+               final int biu = index(ru-1);
+               for(int bi=bil; bi<=biu; bi++) {
+                       int lpos = (bi==bil) ? pos(rl) : 0;
+                       int len = (bi==biu) ? pos(ru-1)-lpos+clen : 
blockSize(bi)*clen;
+                       if( rowBlock )
+                               Arrays.fill(data[bi], lpos, lpos+len, v);
+                       else
+                               for(int i=lpos; i<lpos+len; i+=clen)
+                                       Arrays.fill(data[bi], i+cl, i+cu, v);
                }
        }
 
@@ -200,6 +206,14 @@ public class DenseBlockLDRB extends DenseBlock
                for(int bi=0; bi<numBlocks(); bi++)
                        System.arraycopy(db.valuesAt(bi), 0, data[bi], 0, 
size(bi));
        }
+       
+       @Override
+       public void set(int rl, int ru, int cl, int cu, DenseBlock db) {
+               for(int i=rl; i<ru; i++) {
+                       System.arraycopy(db.values(i-rl),
+                               db.pos(i-rl), values(i), pos(i, cl), cu-cl);
+               }
+       }
 
        @Override
        public double get(int r, int c) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
index 3602534..b56397e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
@@ -1639,7 +1639,7 @@ public class LibMatrixAgg
                final int biu = a.index(ru-1);
                for(int bi=bil; bi<=biu; bi++) {
                        int lpos = (bi==bil) ? a.pos(rl) : 0;
-                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi);
+                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi)*n;
                        sum(a.valuesAt(bi), lpos, len, kbuff, kplus);
                }
                c.set(kbuff);
@@ -1701,7 +1701,7 @@ public class LibMatrixAgg
                final int biu = a.index(ru-1);
                for(int bi=bil; bi<=biu; bi++) {
                        int lpos = (bi==bil) ? a.pos(rl) : 0;
-                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi);
+                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi)*n;
                        sum(a.valuesAt(bi), lpos, len, kbuff, kplusSq);
                }
                c.set(kbuff);
@@ -1859,7 +1859,7 @@ public class LibMatrixAgg
                final int biu = a.index(ru-1);
                for(int bi=bil; bi<=biu; bi++) {
                        int lpos = (bi==bil) ? a.pos(rl) : 0;
-                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi);
+                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi)*n;
                        tmp = builtin(a.valuesAt(bi), lpos, tmp, len, builtin);
                }
                c.set(0, 0, tmp);
@@ -1957,7 +1957,7 @@ public class LibMatrixAgg
                int tlen = 0;
                for(int bi=bil; bi<=biu; bi++) {
                        int lpos = (bi==bil) ? a.pos(rl) : 0;
-                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi);
+                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi)*n;
                        mean(a.valuesAt(bi), lpos, len, 0, kbuff, kmean);
                        tlen += len;
                }
@@ -2027,7 +2027,7 @@ public class LibMatrixAgg
                final int biu = a.index(ru-1);
                for(int bi=bil; bi<=biu; bi++) {
                        int lpos = (bi==bil) ? a.pos(rl) : 0;
-                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi);
+                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi)*n;
                        var(a.valuesAt(bi), lpos, len, cbuff, cm);
                }
                // store results: { var | mean, count, m2 correction, mean 
correction }
@@ -2109,7 +2109,7 @@ public class LibMatrixAgg
                double tmp = 1;
                for(int bi=bil; bi<=biu; bi++) {
                        int lpos = (bi==bil) ? a.pos(rl) : 0;
-                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi);
+                       int len = (bi==biu) ? a.pos(ru-1)-lpos+n : 
a.blockSize(bi)*n;
                        tmp *= product( a.valuesAt(bi), lpos, len );
                }
                c.set(0, 0, tmp);

http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index caf024e..cbeef5e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -35,7 +35,6 @@ import java.util.stream.LongStream;
 
 import org.apache.commons.math3.random.Well1024a;
 import org.apache.hadoop.io.DataInputBuffer;
-import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.lops.MMTSJ.MMTSJType;
@@ -61,7 +60,6 @@ import org.apache.sysml.runtime.functionobjects.RevIndex;
 import org.apache.sysml.runtime.functionobjects.SortIndex;
 import org.apache.sysml.runtime.functionobjects.SwapIndex;
 import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
-import org.apache.sysml.runtime.instructions.cp.CPInstruction;
 import org.apache.sysml.runtime.instructions.cp.KahanObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
@@ -87,11 +85,9 @@ import 
org.apache.sysml.runtime.util.FastBufferedDataInputStream;
 import org.apache.sysml.runtime.util.FastBufferedDataOutputStream;
 import org.apache.sysml.runtime.util.IndexRange;
 import org.apache.sysml.runtime.util.UtilFunctions;
-import org.apache.sysml.utils.GPUStatistics;
 import org.apache.sysml.utils.NativeHelper;
 
 
-
 public class MatrixBlock extends MatrixValue implements CacheBlock, 
Externalizable
 {
        private static final long serialVersionUID = 7319972089143154056L;
@@ -1320,19 +1316,17 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return this;
        }
        
-       private void copySparseToSparse(MatrixBlock that)
-       {
+       private void copySparseToSparse(MatrixBlock that) {
                this.nonZeros=that.nonZeros;
                if( that.isEmptyBlock(false) ) {
                        resetSparse();
                        return;
                }
-       
+               
                allocateSparseRowsBlock(false);
-               for(int i=0; i<Math.min(that.sparseBlock.numRows(), rlen); i++)
-               {
+               for(int i=0; i<Math.min(that.sparseBlock.numRows(), rlen); i++) 
{
                        if(!that.sparseBlock.isEmpty(i)) {
-                               sparseBlock.set(i, that.sparseBlock.get(i), 
true);                              
+                               sparseBlock.set(i, that.sparseBlock.get(i), 
true);
                        }
                        else if(!this.sparseBlock.isEmpty(i)) {
                                this.sparseBlock.reset(i,estimatedNNzsPerRow, 
clen);
@@ -1340,8 +1334,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                }
        }
        
-       private void copyDenseToDense(MatrixBlock that)
-       {
+       private void copyDenseToDense(MatrixBlock that) {
                nonZeros = that.nonZeros;
                
                //plain reset to 0 for empty input
@@ -1351,15 +1344,12 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        return;
                }
                
-               //allocate and init dense block (w/o overwriting nnz)
+               //allocate and copy dense block
                allocateDenseBlock(false);
-               
-               //actual copy 
-               System.arraycopy(that.getDenseBlockValues(), 0, 
getDenseBlockValues(), 0, rlen*clen);
+               denseBlock.set(that.denseBlock);
        }
        
-       private void copySparseToDense(MatrixBlock that)
-       {
+       private void copySparseToDense(MatrixBlock that) {
                this.nonZeros=that.nonZeros;
                if( that.isEmptyBlock(false) ) {
                        if(denseBlock!=null)
@@ -1369,41 +1359,41 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                
                //allocate and init dense block (w/o overwriting nnz)
                allocateDenseBlock(false);
-               
-               int start=0;
-               double[] c = getDenseBlockValues();
-               for(int r=0; r<Math.min(that.sparseBlock.numRows(), rlen); r++, 
start+=clen) {
-                       if(that.sparseBlock.isEmpty(r)) 
-                               continue;
-                       int pos = that.sparseBlock.pos(r);
-                       int len = that.sparseBlock.size(r);
-                       int[] aix = that.sparseBlock.indexes(r);
-                       double[] avals = that.sparseBlock.values(r);
-                       for(int i=pos; i<pos+len; i++)
-                               c[start+aix[i]]=avals[i];
+               SparseBlock a = that.getSparseBlock();
+               DenseBlock c = getDenseBlock();
+               for( int i=0; i<Math.min(a.numRows(), rlen); i++ ) {
+                       if( a.isEmpty(i) ) continue;
+                       int pos = a.pos(i);
+                       int len = a.size(i);
+                       int[] aix = a.indexes(i);
+                       double[] avals = a.values(i);
+                       double[] cvals = c.values(i);
+                       int cix = c.pos(i);
+                       for( int j=pos; j<pos+len; j++ )
+                               cvals[cix+aix[j]] = avals[j];
                }
        }
        
-       private void copyDenseToSparse(MatrixBlock that)
-       {
+       private void copyDenseToSparse(MatrixBlock that) {
                nonZeros = that.nonZeros;
                if( that.isEmptyBlock(false) ) {
                        resetSparse();
                        return;
                }
                
-               allocateSparseRowsBlock(false);
-       
-               double[] b = that.getDenseBlockValues();
-               for(int i=0, ix=0; i<rlen; i++) {
-                       sparseBlock.reset(i, estimatedNNzsPerRow, clen);
-                       for(int j=0; j<clen; j++) {
-                               double val = b[ix++];
-                               if( val != 0 ) {
-                                       //create sparse row only if required
-                                       sparseBlock.allocate(i, 
estimatedNNzsPerRow, clen);
-                                       sparseBlock.append(i, j, val);
-                               }
+               if( !allocateSparseRowsBlock(false) )
+                       resetSparse();
+               DenseBlock a = that.getDenseBlock();
+               SparseBlock c = getSparseBlock();
+               for(int i=0; i<rlen; i++) {
+                       double[] avals = a.values(i);
+                       int aix = a.pos(i);
+                       for( int j=0; j<clen; j++ ) {
+                               double val = avals[aix+j];
+                               if( val == 0 ) continue;
+                               //create sparse row only if required
+                               c.allocate(i, estimatedNNzsPerRow, clen);
+                               c.append(i, j, val);
                        }
                }
        }
@@ -1440,13 +1430,12 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        }
 
        private void copySparseToSparse(int rl, int ru, int cl, int cu, 
MatrixBlock src, boolean awareDestNZ)
-       {       
+       {
                //handle empty src and dest
-               if( src.isEmptyBlock(false) )
-               {
+               if( src.isEmptyBlock(false) ) {
                        if( awareDestNZ && sparseBlock != null )
                                copyEmptyToSparse(rl, ru, cl, cu, true);
-                       return;         
+                       return;
                }
                
                if(sparseBlock==null)
@@ -1468,14 +1457,12 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                int apos = a.pos(i); 
                                int alen = a.size(i);
                                int[] aix = a.indexes(i);
-                               double[] avals = a.values(i);           
+                               double[] avals = a.values(i);
                                
-                               if( b.isEmpty(rl+i)  )
-                               {
+                               if( b.isEmpty(rl+i) ) {
                                        b.allocate(rl+i, estimatedNNzsPerRow, 
clen);
                                        for( int j=apos; j<apos+alen; j++ )
                                                b.append(rl+i, cl+aix[j], 
avals[j]);
-                                       
                                        if( awareDestNZ )
                                                nonZeros += b.size(rl+i);
                                }
@@ -1492,13 +1479,13 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                                        b.set(rl+i, cl+aix[j], 
avals[j]);
                                        }
                                        nonZeros += (b.size(rl+i) - lnnz);
-                               }       
+                               }
                                else //general case (w/o awareness NNZ)
-                               {               
-                                       //TODO perf sparse row                  
                
+                               {
+                                       //TODO perf sparse row
                                        for( int j=apos; j<apos+alen; j++ )
                                                b.set(rl+i, cl+aix[j], 
avals[j]);
-                               }                               
+                               }
                        }
                }
        }
@@ -1510,30 +1497,31 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                if( src.isEmptyBlock(false) ) {
                        if( awareDestNZ && denseBlock != null ) {
                                nonZeros -= recomputeNonZeros(rl, ru, cl, cu);
-                               copyEmptyToDense(rl, ru, cl, cu);
+                               denseBlock.set(rl, ru+1, cl, cu+1, 0);
                        }
-                       return;         
+                       return;
                }
                if(denseBlock==null)
                        allocateDenseBlock();
                else if( awareDestNZ ) {
                        nonZeros -= recomputeNonZeros(rl, ru, cl, cu);
-                       copyEmptyToDense(rl, ru, cl, cu);
+                       denseBlock.set(rl, ru+1, cl, cu+1, 0);
                }
-
+               
                //copy values
                SparseBlock a = src.sparseBlock;
-               double[] c = getDenseBlockValues();
-               for( int i=0, ix=rl*clen; i<src.rlen; i++, ix+=clen ) {
+               DenseBlock c = getDenseBlock();
+               for( int i=0; i<src.rlen; i++ ) {
                        if( a.isEmpty(i) ) continue;
                        int apos = a.pos(i);
                        int alen = a.size(i);
                        int[] aix = a.indexes(i);
                        double[] avals = a.values(i);
+                       double[] cvals = c.values(rl+i);
+                       int cix = c.pos(rl+i, cl);
                        for( int j=apos; j<apos+alen; j++ )
-                               c[ix+cl+aix[j]] = avals[j];
-                       if(awareDestNZ)
-                               nonZeros += alen;
+                               cvals[cix+aix[j]] = avals[j];
+                       nonZeros += awareDestNZ ? alen : 0;
                }
        }
 
@@ -1551,22 +1539,24 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                allocateSparseRowsBlock(false);
                
                //copy values
-               SparseBlock c = sparseBlock;
-               double[] a = src.getDenseBlockValues();
-               for( int i=0, ix=0; i<src.rlen; i++, ix+=src.clen )
+               DenseBlock a = src.getDenseBlock();
+               SparseBlock c = getSparseBlock();
+               for( int i=0; i<src.rlen; i++ )
                {
                        int rix = rl + i;
+                       double[] avals = a.values(i);
+                       int aix = a.pos(i);
                        if( c instanceof SparseBlockMCSR 
                                && c.isEmpty(rix) ) //special case MCSR append
                        {
                                //count nnz per row (fits likely in L1 cache)
-                               int lnnz = UtilFunctions.computeNnz(a, ix, 
src.clen);
+                               int lnnz = UtilFunctions.computeNnz(avals, aix, 
src.clen);
                                
                                //allocate row once and copy values
                                if( lnnz > 0 ) {
                                        c.allocate(rix, lnnz);
                                        for( int j=0; j<src.clen; j++ ) {
-                                               double val = a[ix+j];
+                                               double val = avals[aix+j];
                                                if( val != 0 )
                                                        c.append(rix, cl+j, 
val); 
                                        }
@@ -1574,22 +1564,20 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                                nonZeros += lnnz;
                                }
                        }
-                       else if( awareDestNZ ) //general case (w/ awareness NNZ)
-                       {
+                       else if( awareDestNZ ) { //general case (w/ awareness 
NNZ)
                                int lnnz = c.size(rix);
                                if( cl==cu ) {
-                                       double val = a[ix];
+                                       double val = avals[aix];
                                        c.set(rix, cl, val);
                                }
                                else {
-                                       c.setIndexRange(rix, cl, cu+1, a, ix, 
src.clen);
+                                       c.setIndexRange(rix, cl, cu+1, avals, 
aix, src.clen);
                                }
                                nonZeros += (c.size(rix) - lnnz);
-                       }       
-                       else //general case (w/o awareness NNZ)
-                       {
+                       }
+                       else { //general case (w/o awareness NNZ)
                                for( int j=0; j<src.clen; j++ ) {
-                                       double val = a[ix+j];
+                                       double val = avals[aix+j];
                                        if( val != 0 ) 
                                                c.set(rix, cl+j, val);
                                }
@@ -1604,7 +1592,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                if( src.isEmptyBlock(false) ) {
                        if( awareDestNZ && denseBlock != null ) {
                                nonZeros -= recomputeNonZeros(rl, ru, cl, cu);
-                               copyEmptyToDense(rl, ru, cl, cu);
+                               denseBlock.set(rl, ru+1, cl, cu+1, 0);
                        }
                        return;
                }
@@ -1617,23 +1605,14 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        nonZeros = nonZeros - recomputeNonZeros(rl, ru, cl, cu) 
+ src.nonZeros;
                
                //copy values
-               double[] a = src.getDenseBlockValues();
-               double[] c = getDenseBlockValues();
-               int rowLen = cu-cl+1;
-               if(clen == src.clen) //optimization for equal width
-                       System.arraycopy(a, 0, c, rl*clen+cl, 
src.rlen*src.clen);
-               else
-                       for( int i=0, ix1=0, ix2=rl*clen+cl; i<src.rlen; i++, 
ix1+=src.clen, ix2+=clen ) {
-                               System.arraycopy(a, ix1, c, ix2, rowLen);
-                       }
+               DenseBlock a = src.getDenseBlock();
+               DenseBlock c = getDenseBlock();
+               c.set(rl, ru+1, cl, cu+1, a);
        }
        
-       private void copyEmptyToSparse(int rl, int ru, int cl, int cu, boolean 
updateNNZ ) 
-       {
+       private void copyEmptyToSparse(int rl, int ru, int cl, int cu, boolean 
updateNNZ ) {
                SparseBlock a = sparseBlock;
-               
-               if( cl==cu ) //specific case: column vector
-               {
+               if( cl==cu ) { //specific case: column vector
                        for( int i=rl; i<=ru; i++ )
                                if( !a.isEmpty(i) ) {
                                        boolean update = a.set(i, cl, 0);
@@ -1641,8 +1620,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                                nonZeros -= update ? 1 : 0;
                                }
                }
-               else
-               {
+               else {
                        for( int i=rl; i<=ru; i++ )
                                if( !a.isEmpty(i) ) {
                                        int lnnz = a.size(i);
@@ -1652,20 +1630,8 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                }
                }
        }
-       
-       private void copyEmptyToDense(int rl, int ru, int cl, int cu) {
-               int rowLen = cu-cl+1;
-               double[] a = getDenseBlockValues();
-               if(clen == rowLen) //optimization for equal width
-                       Arrays.fill(a, rl*clen+cl, ru*clen+cu+1, 0);
-               else
-                       for( int i=rl, ix2=rl*clen+cl; i<=ru; i++, ix2+=clen )
-                               Arrays.fill(a, ix2, ix2+rowLen, 0);
-       }
 
-       public void merge(CacheBlock that, boolean appendOnly) 
-                       throws DMLRuntimeException
-       {
+       public void merge(CacheBlock that, boolean appendOnly) throws 
DMLRuntimeException {
                merge((MatrixBlock)that, appendOnly);
        }
 
@@ -2540,8 +2506,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return est;
        }
        
-       private boolean estimateSparsityOnSlice(int selectRlen, int selectClen, 
int finalRlen, int finalClen)
-       {
+       private boolean estimateSparsityOnSlice(int selectRlen, int selectClen, 
int finalRlen, int finalClen) {
                long ennz = 
(long)((double)nonZeros/rlen/clen*selectRlen*selectClen);
                return evalSparseFormatInMemory(finalRlen, finalClen, ennz); 
        }
@@ -2554,6 +2519,12 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return evalSparseFormatInMemory(rlenm1, clenm1, ennz);
        }
        
+       private boolean requiresInplaceSparseBlockOnLeftIndexing(boolean 
sparse, UpdateType update, long nnz) {
+               return sparse && update != UpdateType.INPLACE_PINNED
+                       && !isShallowSerialize() && (nnz <= Integer.MAX_VALUE
+                               || 
DEFAULT_INPLACE_SPARSEBLOCK==SparseBlock.Type.MCSR);
+       }
+       
        private static boolean estimateSparsityOnGroupedAgg( long rlen, long 
groups ) {
                long ennz = Math.min(groups, rlen);
                return evalSparseFormatInMemory(groups, 1, ennz);
@@ -3521,28 +3492,18 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        LibMatrixMult.matrixMultPermute(this, m2, ret1, ret2, 
k);
                else
                        LibMatrixMult.matrixMultPermute(this, m2, ret1, ret2);
-
        }
        
-       public final MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, 
IndexRange ixrange, MatrixBlock ret, UpdateType update) throws 
DMLRuntimeException {
-               return leftIndexingOperations(rhsMatrix, ixrange, ret, update, 
null);
-       }
-
-       public final MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, 
IndexRange ixrange, MatrixBlock ret, UpdateType update, String opcode) 
-               throws DMLRuntimeException 
+       public final MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix,
+                       IndexRange ixrange, MatrixBlock ret, UpdateType update)
+               throws DMLRuntimeException
        {
-               return leftIndexingOperations(
-                       rhsMatrix, (int)ixrange.rowStart, (int)ixrange.rowEnd, 
-                       (int)ixrange.colStart, (int)ixrange.colEnd, ret, 
update, opcode);
-       }
-       
-       public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int 
rl, int ru, 
-                       int cl, int cu, MatrixBlock ret, UpdateType update) 
throws DMLRuntimeException {
-               return leftIndexingOperations(rhsMatrix, rl, ru, cl, cu, ret, 
update, null);
+               return leftIndexingOperations(rhsMatrix, (int)ixrange.rowStart,
+                       (int)ixrange.rowEnd, (int)ixrange.colStart, 
(int)ixrange.colEnd, ret, update);
        }
-       
-       public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int 
rl, int ru, 
-                       int cl, int cu, MatrixBlock ret, UpdateType update, 
String opcode) 
+
+       public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix,
+                       int rl, int ru, int cl, int cu, MatrixBlock ret, 
UpdateType update)
                throws DMLRuntimeException 
        {       
                // Check the validity of bounds
@@ -3560,22 +3521,19 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                }
                
                MatrixBlock result = ret;
-               boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, 
nonZeros, 
+               boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, 
nonZeros,
                        rhsMatrix.getNumRows(), rhsMatrix.getNumColumns(), 
rhsMatrix.getNonZeros());
                
-               if( !update.isInPlace() ) //general case
-               {
+               if( !update.isInPlace() ) { //general case
                        if(result==null)
                                result=new MatrixBlock(rlen, clen, sp);
                        else
                                result.reset(rlen, clen, sp);
                        result.copy(this, sp);
                }
-               else //update in-place
-               {
+               else { //update in-place
                        //use current block as in-place result
                        result = this;
-                       
                        //ensure that the current block adheres to the sparsity 
estimate
                        //and thus implicitly the memory budget used by the 
compiler
                        if( result.sparse && !sp )
@@ -3584,10 +3542,9 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                result.denseToSparse();
                        
                        //ensure right sparse block representation to prevent 
serialization
-                       if( result.sparse && update != 
UpdateType.INPLACE_PINNED ) {
+                       if( 
requiresInplaceSparseBlockOnLeftIndexing(result.sparse, update, 
result.nonZeros+rhsMatrix.nonZeros) )
                                result.sparseBlock = 
SparseBlockFactory.copySparseBlock(
-                                               DEFAULT_INPLACE_SPARSEBLOCK, 
result.sparseBlock, false);
-                       }
+                                       DEFAULT_INPLACE_SPARSEBLOCK, 
result.sparseBlock, false);
                }
                
                //NOTE conceptually we could directly use a zeroout and 
copy(..., false) but
@@ -3602,33 +3559,25 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        result.quickSetValue(rl, cl, src.quickGetValue(0, 0));
                }
                else { //general case
-                       long t1 = opcode != null && DMLScript.STATISTICS && 
DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
-                       boolean isCSRCopy = false;
                        //handle csr sparse blocks separately to avoid repeated 
shifting on column-wise access
                        if( !result.isEmptyBlock(false) && result.sparse && 
result.sparseBlock instanceof SparseBlockCSR ) {
                                SparseBlockCSR sblock = (SparseBlockCSR) 
result.sparseBlock;
-                               if( src.sparse )
+                               if( src.sparse || src.isEmptyBlock(false) )
                                        sblock.setIndexRange(rl, ru+1, cl, 
cu+1, src.getSparseBlock());
                                else { //dense
-                                       double[] data = 
(src.getDenseBlock()!=null) ?
-                                               src.getDenseBlockValues() : 
null;
-                                       sblock.setIndexRange(rl, ru+1, cl, 
cu+1, data,
-                                               0, 
src.getNumRows()*src.getNumColumns());
+                                       for(int bi=0; 
bi<src.denseBlock.numBlocks(); bi++) {
+                                               int rpos = bi * 
src.denseBlock.blockSize();
+                                               int blen = 
src.denseBlock.blockSize(bi);
+                                               sblock.setIndexRange(rl+rpos, 
rl+rpos+blen, cl, cu+1,
+                                                       
src.denseBlock.valuesAt(bi), 0, src.rlen*src.clen);
+                                       }
                                }
                                result.nonZeros = sblock.size();
-                               isCSRCopy = true;
                        }
                        //copy submatrix into result
                        else {
                                result.copy(rl, ru, cl, cu, src, true);
                        }
-                       if(opcode != null && DMLScript.STATISTICS && 
DMLScript.FINEGRAINED_STATISTICS) {
-                               long t2 = System.nanoTime();
-                               if(isCSRCopy)
-                                       
GPUStatistics.maintainCPMiscTimes(opcode, 
CPInstruction.MISC_TIMER_CSR_LIX_COPY, t2-t1);
-                               else
-                                       
GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_LIX_COPY, 
t2-t1);
-                       }
                }
 
                return result;
@@ -3649,36 +3598,33 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
         * @return matrix block
         * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public MatrixBlock leftIndexingOperations(ScalarObject scalar, int rl, 
int cl, MatrixBlock ret, UpdateType update) 
+       public MatrixBlock leftIndexingOperations(ScalarObject scalar, int rl, 
int cl, MatrixBlock ret, UpdateType update)
                throws DMLRuntimeException 
        {
                double inVal = scalar.getDoubleValue();
                boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, 
nonZeros, 1, 1, (inVal!=0)?1:0);
                
-               if( !update.isInPlace() ) //general case
-               {
+               if( !update.isInPlace() ) { //general case
                        if(ret==null)
                                ret=new MatrixBlock(rlen, clen, sp);
                        else
                                ret.reset(rlen, clen, sp);
                        ret.copy(this, sp);
-                       
                }
-               else //update in-place
-               {
+               else { //update in-place
                        //use current block as in-place result
                        ret = this;
-                       
                        //ensure right sparse block representation to prevent 
serialization
-                       if( ret.sparse && update != UpdateType.INPLACE_PINNED ) 
{
+                       if( 
requiresInplaceSparseBlockOnLeftIndexing(ret.sparse, update, ret.nonZeros+1) )
                                ret.sparseBlock = 
SparseBlockFactory.copySparseBlock(
-                                               DEFAULT_INPLACE_SPARSEBLOCK, 
ret.sparseBlock, false);
-                       }
+                                       DEFAULT_INPLACE_SPARSEBLOCK, 
ret.sparseBlock, false);
                }
                
                ret.quickSetValue(rl, cl, inVal);
                return ret;
        }
+       
+       
 
        public MatrixBlock sliceOperations(IndexRange ixrange, MatrixBlock ret) 
throws DMLRuntimeException {
                return sliceOperations(
@@ -3727,7 +3673,10 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                // actual slice operation
                if( rl==0 && ru==rlen-1 && cl==0 && cu==clen-1 ) {
                        // copy if entire matrix required
-                       result.copy( this );
+                       if( deep )
+                               result.copy( this );
+                       else
+                               result = this;
                }
                else //general case
                {
@@ -3750,7 +3699,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                
                if( cl==cu ) //COLUMN VECTOR 
                {
-                       //note: always dense dest
+                       //note: always dense single-block dest
                        dest.allocateDenseBlock();
                        double[] c = dest.getDenseBlockValues();
                        for( int i=rl; i<=ru; i++ ) {
@@ -3772,18 +3721,18 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                }
                else //general case (sparse/dense dest)
                {
-                       for(int i=rl; i <= ru; i++) 
-                               if( !sparseBlock.isEmpty(i)) 
-                               {
-                                       int apos = sparseBlock.pos(i);
-                                       int alen = sparseBlock.size(i);
-                                       int[] aix = sparseBlock.indexes(i);
-                                       double[] avals = sparseBlock.values(i);
-                                       int astart = 
(cl>0)?sparseBlock.posFIndexGTE(i, cl) : 0;
-                                       if( astart != -1 )
-                                               for( int j=apos+astart; 
j<apos+alen && aix[j] <= cu; j++ )
-                                                       dest.appendValue(i-rl, 
aix[j]-cl, avals[j]);
-                               }
+                       SparseBlock sblock = sparseBlock;
+                       for(int i=rl; i <= ru; i++) {
+                               if( sblock.isEmpty(i) ) continue;
+                               int apos = sblock.pos(i);
+                               int alen = sblock.size(i);
+                               int[] aix = sblock.indexes(i);
+                               double[] avals = sblock.values(i);
+                               int astart = (cl>0)?sblock.posFIndexGTE(i, cl) 
: 0;
+                               if( astart != -1 )
+                                       for( int j=apos+astart; j<apos+alen && 
aix[j] <= cu; j++ )
+                                               dest.appendValue(i-rl, 
aix[j]-cl, avals[j]);
+                       }
                }
        }
 
@@ -3796,32 +3745,25 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                dest.allocateDenseBlock();
 
                //indexing operation
-               if( cl==cu ) //COLUMN INDEXING
-               {
-                       if( clen==1 ) //vector -> vector
-                       {
-                               System.arraycopy(getDenseBlockValues(), rl, 
dest.getDenseBlockValues(), 0, ru-rl+1);
+               if( cl==cu ) { //COLUMN INDEXING
+                       //note: output always single block
+                       if( clen==1 ) { //vector -> vector
+                               System.arraycopy(getDenseBlockValues(), rl,
+                                       dest.getDenseBlockValues(), 0, ru-rl+1);
                        }
-                       else //matrix -> vector
-                       {
-                               //IBM JVM bug (JDK7) causes crash for certain 
cl/cu values (e.g., divide by zero for 4) 
-                               //for( int i=rl*clen+cl, ix=0; i<=ru*clen+cu; 
i+=clen, ix++ )
-                               //      dest.denseBlock[ix] = denseBlock[i];
-                               int len = clen;
-                               double[] a = getDenseBlockValues();
+                       else { //matrix -> vector
+                               DenseBlock a = getDenseBlock();
                                double[] c = dest.getDenseBlockValues();
-                               for( int i=rl*len+cl, ix=0; i<=ru*len+cu; 
i+=len, ix++ )
-                                       c[ix] = a[i];
+                               for( int i=rl; i<=ru; i++ )
+                                       c[i] = a.get(i, cl);
                        }
                }
-               else // GENERAL RANGE INDEXING
-               {
-                       int len1 = clen;
-                       int len2 = dest.clen;
-                       double[] a = getDenseBlockValues();
-                       double[] c = dest.getDenseBlockValues();
-                       for(int i = rl, ix1 = rl*len1+cl, ix2=0; i <= ru; i++, 
ix1+=len1, ix2+=len2) 
-                               System.arraycopy(a, ix1, c, ix2, len2);
+               else { // GENERAL RANGE INDEXING
+                       DenseBlock a = getDenseBlock();
+                       DenseBlock c = dest.getDenseBlock();
+                       int len = dest.clen;
+                       for(int i = rl; i <= ru; i++)
+                               System.arraycopy(a.values(i), a.pos(i)+cl, 
c.values(i-rl), c.pos(i-rl), len);
                }
                
                //compute nnz of output (not maintained due to native calls)
@@ -4723,7 +4665,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                if( !(op.binaryFn instanceof Multiply && op.aggOp.increOp.fn 
instanceof Plus) ) {
                        throw new DMLRuntimeException("Unsupported binary 
aggregate operation: ("+op.binaryFn+", "+op.aggOp+").");
                }
-                       
+               
                //setup meta data (dimensions, sparsity)
                int rl = m1.rlen;
                int cl = m2.clen;

Reply via email to