Repository: systemml
Updated Branches:
  refs/heads/master 94d0a1283 -> 7987caa6b


[SYSTEMML-2046] Large dense blocks in various reorg operations

This patch adds support for large dense blocks in reverse, diag,
removeEmtpy, reshape, rexpand, sort, and generic reorg operations. The
the missing reorg operation is transpose because it requires detailed
micro benchmarks due to cache-conscious operations. Furthermore, this
also includes minor cleanups of obsolete matrix block abstractions.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/0a262dc3
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/0a262dc3
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/0a262dc3

Branch: refs/heads/master
Commit: 0a262dc3d31863db0668d05b9b058698b4b301bd
Parents: 94d0a12
Author: Matthias Boehm <[email protected]>
Authored: Sat Jan 6 22:16:28 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Sun Jan 7 15:04:38 2018 -0800

----------------------------------------------------------------------
 .../compress/utils/LinearAlgebraUtils.java      |  11 +-
 .../parfor/util/StagingFileUtils.java           |  10 +-
 .../apache/sysml/runtime/io/ReaderTextCSV.java  |  11 +-
 .../sysml/runtime/io/ReaderTextCSVParallel.java |  11 +-
 .../apache/sysml/runtime/io/ReaderTextCell.java |  12 +-
 .../runtime/io/ReaderTextCellParallel.java      |  15 +-
 .../runtime/matrix/data/LibMatrixDatagen.java   |  24 +-
 .../runtime/matrix/data/LibMatrixReorg.java     | 374 ++++++++++---------
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  50 ++-
 9 files changed, 258 insertions(+), 260 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java 
b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
index 981d944..c6be7f7 100644
--- 
a/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
+++ 
b/src/main/java/org/apache/sysml/runtime/compress/utils/LinearAlgebraUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.runtime.compress.utils;
 
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 
@@ -146,12 +147,10 @@ public class LinearAlgebraUtils
        
        public static void copyNonZerosToUpperTriangle( MatrixBlock ret, 
MatrixBlock tmp, int ix ) {
                double[] a = tmp.getDenseBlockValues();
-               for(int i=0; i<tmp.getNumColumns(); i++) {
-                       if( a[i] != 0 ) {
-                               ret.setValueDenseUnsafe(
-                                       (ix<i)?ix:i, (ix<i)?i:ix, a[i]);
-                       }
-               }
+               DenseBlock c = ret.getDenseBlock();
+               for(int i=0; i<tmp.getNumColumns(); i++)
+                       if( a[i] != 0 )
+                               c.set((ix<i)?ix:i, (ix<i)?i:ix, a[i]);
        }
        
        /**

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/util/StagingFileUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/util/StagingFileUtils.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/util/StagingFileUtils.java
index 87ad16c..afe0d50 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/util/StagingFileUtils.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/util/StagingFileUtils.java
@@ -32,6 +32,7 @@ import java.util.LinkedList;
 
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.util.FastStringTokenizer;
 
@@ -208,22 +209,21 @@ public class StagingFileUtils
                        }
                        else
                        {
-                               while( (value=in.readLine())!=null )
-                               {
+                               DenseBlock a = tmp.getDenseBlock();
+                               while( (value=in.readLine())!=null ) {
                                        st.reset( value ); //reset tokenizer
                                        int row = st.nextInt();
                                        int col = st.nextInt();
                                        double lvalue = st.nextDouble();
-                                       tmp.setValueDenseUnsafe(row, col, 
lvalue);
+                                       a.set(row, col, lvalue);
                                }
-                               
                                tmp.recomputeNonZeros();
                        }
                }
                finally {
                        IOUtilFunctions.closeSilently(in);
                }
-                       
+               
                //finally change internal representation if required
                tmp.examSparsity();
                

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSV.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSV.java 
b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSV.java
index 9ad97e2..c0f4d99 100644
--- a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSV.java
+++ b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSV.java
@@ -38,6 +38,7 @@ import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.matrix.CSVReblockMR;
 import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.util.UtilFunctions;
 
@@ -182,15 +183,13 @@ public class ReaderTextCSV extends MatrixReader
                        } 
                        else //DENSE<-value
                        {
-                               while( (value=br.readLine())!=null ) //foreach 
line
-                               {
+                               DenseBlock a = dest.getDenseBlock();
+                               while( (value=br.readLine())!=null ) { 
//foreach line
                                        String cellStr = 
value.toString().trim();
                                        emptyValuesFound = false;
                                        String[] parts = 
IOUtilFunctions.split(cellStr, delim);
                                        int col = 0;
-                                       
-                                       for( String part : parts ) //foreach 
cell
-                                       {
+                                       for( String part : parts ) { //foreach 
cell
                                                part = part.trim();
                                                if ( part.isEmpty() ) {
                                                        emptyValuesFound = true;
@@ -200,7 +199,7 @@ public class ReaderTextCSV extends MatrixReader
                                                        cellValue = 
UtilFunctions.parseToDouble(part);
                                                }
                                                if ( cellValue != 0 ) {
-                                                       
dest.setValueDenseUnsafe(row, col, cellValue);
+                                                       a.set(row, col, 
cellValue);
                                                        lnnz++;
                                                }
                                                col++;

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSVParallel.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSVParallel.java 
b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSVParallel.java
index f145d0a..6c0b463 100644
--- a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSVParallel.java
+++ b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCSVParallel.java
@@ -42,6 +42,7 @@ import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 
 /**
@@ -421,14 +422,12 @@ public class ReaderTextCSVParallel extends MatrixReader
                                        } 
                                        else // DENSE<-value
                                        {
-                                               while (reader.next(key, value)) 
// foreach line
-                                               {
+                                               DenseBlock a = 
_dest.getDenseBlock();
+                                               while (reader.next(key, value)) 
{ // foreach line
                                                        String cellStr = 
value.toString().trim();
                                                        String[] parts = 
IOUtilFunctions.split(cellStr, _delim);
                                                        col = 0;
-
-                                                       for (String part : 
parts) // foreach cell
-                                                       {
+                                                       for (String part : 
parts) { // foreach cell
                                                                part = 
part.trim();
                                                                if 
(part.isEmpty()) {
                                                                        
noFillEmpty |= !_fill;
@@ -438,7 +437,7 @@ public class ReaderTextCSVParallel extends MatrixReader
                                                                        
cellValue = IOUtilFunctions.parseDoubleParallel(part);
                                                                }
                                                                if( cellValue 
!= 0 ) {
-                                                                       
_dest.setValueDenseUnsafe(row, col, cellValue);
+                                                                       
a.set(row, col, cellValue);
                                                                        lnnz++;
                                                                }
                                                                col++;

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/io/ReaderTextCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCell.java 
b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCell.java
index 7c51637..dd5ae51 100644
--- a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCell.java
+++ b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCell.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.mapred.TextInputFormat;
 
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.InputInfo;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.util.FastStringTokenizer;
@@ -135,12 +136,13 @@ public class ReaderTextCell extends MatrixReader
                                        } 
                                        else //DENSE<-value
                                        {
+                                               DenseBlock a = 
dest.getDenseBlock();
                                                while( reader.next(key, value) 
) {
                                                        st.reset( 
value.toString() ); //reinit tokenizer
                                                        row = st.nextInt()-1;
                                                        col = st.nextInt()-1;
                                                        double lvalue = 
st.nextDouble();
-                                                       
dest.setValueDenseUnsafe( row, col, lvalue );
+                                                       a.set( row, col, lvalue 
);
                                                }
                                        }
                                }
@@ -220,13 +222,13 @@ public class ReaderTextCell extends MatrixReader
                        } 
                        else //DENSE<-value
                        {
-                               while( (value=br.readLine())!=null )
-                               {
+                               DenseBlock a = dest.getDenseBlock();
+                               while( (value=br.readLine())!=null ) {
                                        st.reset( value ); //reinit tokenizer
                                        row = st.nextInt()-1;
-                                       col = st.nextInt()-1;   
+                                       col = st.nextInt()-1;
                                        double lvalue = st.nextDouble();
-                                       dest.setValueDenseUnsafe( row, col, 
lvalue );
+                                       a.set( row, col, lvalue );
                                }
                        }
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/io/ReaderTextCellParallel.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCellParallel.java 
b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCellParallel.java
index 6f4fa25..b692cb1 100644
--- a/src/main/java/org/apache/sysml/runtime/io/ReaderTextCellParallel.java
+++ b/src/main/java/org/apache/sysml/runtime/io/ReaderTextCellParallel.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.InputInfo;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.util.FastStringTokenizer;
@@ -198,10 +199,9 @@ public class ReaderTextCellParallel extends MatrixReader
                        RecordReader<LongWritable,Text> reader = 
_informat.getRecordReader(_split, _job, Reporter.NULL);
                        
                        try
-                       {                       
-                               
+                       {
                                // Read the header lines, if reading from a 
matrixMarket file
-                               if ( _matrixMarket ) {                          
        
+                               if ( _matrixMarket ) {
                                        // skip until end-of-comments (%% or %)
                                        boolean foundComment = false;
                                        while( reader.next(key, value) && 
value.toString().charAt(0) == '%'  ) {
@@ -215,7 +215,7 @@ public class ReaderTextCellParallel extends MatrixReader
                                                row = st.nextInt()-1;
                                                col = st.nextInt()-1;
                                                double lvalue = 
st.nextDoubleForParallel();
-                                               synchronized( _dest ){ //sparse 
requires lock   
+                                               synchronized( _dest ){ //sparse 
requires lock
                                                        _dest.appendValue(row, 
col, lvalue);
                                                        lnnz++;
                                                }
@@ -249,21 +249,22 @@ public class ReaderTextCellParallel extends MatrixReader
                                } 
                                else //DENSE<-value
                                {
+                                       DenseBlock a = _dest.getDenseBlock();
                                        while( reader.next(key, value) ) {
                                                st.reset( value.toString() ); 
//reinit tokenizer
                                                row = st.nextInt()-1;
                                                col = st.nextInt()-1;
                                                double lvalue = 
st.nextDoubleForParallel();
-                                               _dest.setValueDenseUnsafe( row, 
col, lvalue );
+                                               a.set( row, col, lvalue );
                                                lnnz += (lvalue!=0) ? 1 : 0;
                                        }
                                }
                        }
-                       catch(Exception ex)     {
+                       catch(Exception ex) {
                                //post-mortem error handling and bounds checking
                                if( row < 0 || row + 1 > _rlen || col < 0 || 
col + 1 > _clen )
                                        throw new RuntimeException("Matrix cell 
["+(row+1)+","+(col+1)+"] " +
-                                                         "out of overall 
matrix range [1:"+_rlen+",1:"+_clen+"]. ", ex);
+                                               "out of overall matrix range 
[1:"+_rlen+",1:"+_clen+"]. ", ex);
                                else
                                        throw new RuntimeException("Unable to 
read matrix in text cell format. ", ex);
                        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
index 611eed3..9dabf2b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
@@ -428,41 +428,37 @@ public class LibMatrixDatagen
                //set meta data and allocate dense block
                out.reset(size, 1, false);
                out.allocateDenseBlock();
+               DenseBlock a = out.getDenseBlock();
                seed = (seed == -1 ? System.nanoTime() : seed);
                
                if ( !replace ) 
                {
                        // reservoir sampling
-                       
                        for(int i=1; i <= size; i++) 
-                               out.setValueDenseUnsafe(i-1, 0, i );
+                               a.set(i-1, 0, i );
                        
                        Random rand = new Random(seed);
-                       for(int i=size+1; i <= range; i++) 
-                       {
+                       for(int i=size+1; i <= range; i++) {
                                if(rand.nextInt(i) < size)
-                                       out.setValueDenseUnsafe( 
rand.nextInt(size), 0, i );
+                                       a.set( rand.nextInt(size), 0, i );
                        }
                        
                        // randomize the sample (Algorithm P from Knuth's ACP)
                        // -- needed especially when the differnce between 
range and size is small)
                        double tmp;
                        int idx;
-                       for(int i=size-1; i >= 1; i--) 
-                       {
+                       for(int i=size-1; i >= 1; i--) {
                                idx = rand.nextInt(i);
                                // swap i^th and idx^th entries
-                               tmp = out.getValueDenseUnsafe(idx, 0);
-                               out.setValueDenseUnsafe(idx, 0, 
out.getValueDenseUnsafe(i, 0));
-                               out.setValueDenseUnsafe(i, 0, tmp);
+                               tmp = a.get(idx, 0);
+                               a.set(idx, 0, a.get(i, 0));
+                               a.set(i, 0, tmp);
                        }
-       
                }
-               else 
-               {
+               else {
                        Random r = new Random(seed);
                        for(int i=0; i < size; i++) 
-                               out.setValueDenseUnsafe(i, 0, 1+nextLong(r, 
range) );
+                               a.set(i, 0, 1+nextLong(r, range) );
                }
                
                out.recomputeNonZeros();

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index beebd6b..6463b98 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -101,9 +101,8 @@ public class LibMatrixReorg
        {
                ReorgType type = getReorgType(op);
                
-               switch( type )
-               {
-                       case TRANSPOSE: 
+               switch( type ) {
+                       case TRANSPOSE:
                                if( op.getNumThreads() > 1 )
                                        return transpose(in, out, 
op.getNumThreads());
                                else
@@ -111,11 +110,10 @@ public class LibMatrixReorg
                        case REV:
                                return rev(in, out);
                        case DIAG:
-                               return diag(in, out); 
+                               return diag(in, out);
                        case SORT:
                                SortIndex ix = (SortIndex) op.fn;
                                return sort(in, out, ix.getCols(), 
ix.getDecreasing(), ix.getIndexReturn());
-                       
                        default:
                                throw new DMLRuntimeException("Unsupported 
reorg operator: "+op.fn);
                }
@@ -341,7 +339,7 @@ public class LibMatrixReorg
                        
                        if( !sparse && clen == 1 ) { //DENSE COLUMN VECTOR
                                //in-place quicksort, unstable (no indexes 
needed)
-                               out.copy( in ); //dense
+                               out.copy( in ); //dense (always single block)
                                Arrays.sort(out.getDenseBlockValues());
                                if( desc )
                                        sortReverseDense(out);
@@ -351,7 +349,7 @@ public class LibMatrixReorg
                else //SORT INDEX
                {
                        if( in.isEmptyBlock(false) ) { //EMPTY INPUT BLOCK
-                               out.allocateDenseBlock(false);
+                               out.allocateDenseBlock(false); //single block
                                double[] c = out.getDenseBlockValues();
                                for( int i=0; i<rlen; i++ )
                                        c[i] = i+1; //seq(1,n)
@@ -391,8 +389,10 @@ public class LibMatrixReorg
                        //copy input data in sorted order into result
                        if( !sparse ) { //DENSE
                                out.allocateDenseBlock(false);
+                               DenseBlock a = in.getDenseBlock();
+                               DenseBlock c = out.getDenseBlock();
                                for( int i=0; i<rlen; i++ )
-                                       
System.arraycopy(in.getDenseBlockValues(), vix[i]*clen, 
out.getDenseBlockValues(), i*clen, clen);
+                                       System.arraycopy(a.values(vix[i]), 
a.pos(vix[i]), c.values(i), c.pos(i), clen);
                        }
                        else { //SPARSE
                                out.allocateSparseRowsBlock(false);
@@ -405,8 +405,9 @@ public class LibMatrixReorg
                else {
                        //copy sorted index vector into result
                        out.allocateDenseBlock(false);
+                       DenseBlock c = out.getDenseBlock();
                        for( int i=0; i<rlen; i++ )
-                               out.setValueDenseUnsafe(i, 0, vix[i]+1);
+                               c.set(i, 0, vix[i]+1);
                }
                
                return out;
@@ -439,7 +440,7 @@ public class LibMatrixReorg
                        if( SHALLOW_COPY_REORG )
                                out.copyShallow(in);
                        else
-                               out.copy(in); 
+                               out.copy(in);
                        return out;
                }
        
@@ -981,33 +982,35 @@ public class LibMatrixReorg
                return cnt;
        }
 
-       private static void reverseDense(MatrixBlock in, MatrixBlock out) 
+       private static void reverseDense(MatrixBlock in, MatrixBlock out)
                throws DMLRuntimeException
        {
                final int m = in.rlen;
                final int n = in.clen;
-               final int len = m * n;
                
                //set basic meta data and allocate output
                out.sparse = false;
                out.nonZeros = in.nonZeros;
                out.allocateDenseBlock(false);
                
-               double[] a = in.getDenseBlockValues();
-               double[] c = out.getDenseBlockValues();
-               
                //copy all rows into target positions
                if( n == 1 ) { //column vector
+                       double[] a = in.getDenseBlockValues();
+                       double[] c = out.getDenseBlockValues();
                        for( int i=0; i<m; i++ )
                                c[m-1-i] = a[i];
                }
                else { //general matrix case
-                       for( int i=0, aix=0; i<m; i++, aix+=n )
-                               System.arraycopy(a, aix, c, len-aix-n, n);
+                       DenseBlock a = in.getDenseBlock();
+                       DenseBlock c = out.getDenseBlock();
+                       for( int i=0; i<m; i++ ) {
+                               final int ri = m - 1 - i;
+                               System.arraycopy(a.values(i), a.pos(i), 
c.values(ri), c.pos(ri), n);
+                       }
                }
        }
 
-       private static void reverseSparse(MatrixBlock in, MatrixBlock out) 
+       private static void reverseSparse(MatrixBlock in, MatrixBlock out)
                throws DMLRuntimeException
        {
                final int m = in.rlen;
@@ -1018,15 +1021,12 @@ public class LibMatrixReorg
                
                out.allocateSparseRowsBlock(false);
                
+               //copy all rows into target positions
                SparseBlock a = in.getSparseBlock();
                SparseBlock c = out.getSparseBlock();
-               
-               //copy all rows into target positions
-               for( int i=0; i<m; i++ ) {
-                       if( !a.isEmpty(i) ) {
-                               c.set(m-1-i, a.get(i), true);   
-                       }
-               }
+               for( int i=0; i<m; i++ )
+                       if( !a.isEmpty(i) )
+                               c.set(m-1-i, a.get(i), true);
        }
        
        /**
@@ -1092,14 +1092,17 @@ public class LibMatrixReorg
         */
        private static void diagM2V( MatrixBlock in, MatrixBlock out )
        {
+               DenseBlock c = out.allocateBlock().getDenseBlock();
                int rlen = in.rlen;
-               
-               for( int i=0; i<rlen; i++ )
-               {
+               int nnz = 0;
+               for( int i=0; i<rlen; i++ ) {
                        double val = in.quickGetValue(i, i);
-                       if( val != 0 )
-                               out.quickSetValue(i, 0, val);
+                       if( val != 0 ) {
+                               c.set(i, 0, val);
+                               nnz++;
+                       }
                }
+               out.setNonZeros(nnz);
        }
 
        private static void reshapeDense( MatrixBlock in, MatrixBlock out, int 
rows, int cols, boolean rowwise ) 
@@ -1113,7 +1116,7 @@ public class LibMatrixReorg
                        return;
                
                //shallow dense by-row reshape (w/o result allocation)
-               if( SHALLOW_COPY_REORG && rowwise ) {
+               if( SHALLOW_COPY_REORG && rowwise && 
in.denseBlock.numBlocks()==1 ) {
                        //since the physical representation of dense matrices 
is always the same,
                        //we don't need to create a copy, given our copy on 
write semantics.
                        //however, note that with update in-place this would be 
an invalid optimization
@@ -1125,42 +1128,45 @@ public class LibMatrixReorg
                out.allocateDenseBlock(false);
                
                //dense reshape
-               double[] a = in.getDenseBlockValues();
-               double[] c = out.getDenseBlockValues();
+               DenseBlock a = in.getDenseBlock();
+               DenseBlock c = out.getDenseBlock();
                
-               if( rowwise )
-               {
+               if( rowwise ) {
                        //VECTOR-MATRIX, MATRIX-VECTOR, GENERAL CASE
                        //pure copy of rowwise internal representation
-                       System.arraycopy(a, 0, c, 0, c.length);
-               }       
-               else //colwise
-               {
-                       if( rlen==1 || clen==1 ) //VECTOR->MATRIX
-                       {
+                       c.set(a);
+               }
+               else { //colwise
+                       if( rlen==1 || clen==1 ) { //VECTOR->MATRIX
                                //note: cache-friendly on a but not on c
+                               double[] avals = a.valuesAt(0);
+                               double[] cvals = c.valuesAt(0);
                                for( int j=0, aix=0; j<cols; j++ )
                                        for( int i=0, cix=0; i<rows; i++, 
cix+=cols )
-                                               c[ cix + j ] = a[ aix++ ];
+                                               cvals[ cix + j ] = avals[ aix++ 
];
                        }
-                       else if( rows==1 || cols==1 ) //MATRIX->VECTOR  
+                       else if( rows==1 || cols==1 ) //MATRIX->VECTOR
                        {
                                //note: cache-friendly on c but not on a
+                               double[] avals = a.valuesAt(0);
+                               double[] cvals = c.valuesAt(0);
                                for( int j=0, cix=0; j<clen; j++ )
                                        for( int i=0, aix=0; i<rlen; i++, 
aix+=clen )
-                                               c[ cix++ ] = a[ aix + j ];
+                                               cvals[ cix++ ] = avals[ aix + j 
];
                        }
                        else //GENERAL CASE: MATRIX->MATRIX
                        {
                                //note: cache-friendly on c but not an a
-                               for( int i=0, cix=0; i<rows; i++ )
-                                       for( int j=0, aix2=i; j<cols; j++, 
aix2+=rows )
-                                       {
+                               for( int i=0; i<rows; i++ ) {
+                                       double[] cvals = c.values(i);
+                                       int cix = c.pos(i);
+                                       for( int j=0, aix2=i; j<cols; j++, 
aix2+=rows ) {
                                                int ai = aix2%rlen;
                                                int aj = aix2/rlen;
-                                               c[ cix++ ] = a[ ai*clen+aj ];
-                                       }                       
-                               //index conversion c[i,j]<- a[k,l]: 
+                                               cvals[cix+j] = a.get(ai,aj);
+                                       }
+                               }
+                               //index conversion c[i,j]<- a[k,l]:
                                // k = (rows*j+i)%rlen
                                // l = (rows*j+i)/rlen
                        }
@@ -1194,16 +1200,14 @@ public class LibMatrixReorg
                        {
                                //note: cache-friendly on a and c; append-only
                                c.allocate(0, estnnz, cols);
-                               for( int i=0, cix=0; i<rlen; i++, cix+=clen ) 
-                               {
-                                       if( !a.isEmpty(i) ) {
-                                               int apos = a.pos(i);
-                                               int alen = a.size(i);
-                                               int[] aix = a.indexes(i);
-                                               double[] avals = a.values(i);   
-                                               for( int j=apos; j<apos+alen; 
j++ )
-                                                       c.append(0, cix+aix[j], 
avals[j]);
-                                       }
+                               for( int i=0, cix=0; i<rlen; i++, cix+=clen ) {
+                                       if( a.isEmpty(i) ) continue;
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aix = a.indexes(i);
+                                       double[] avals = a.values(i);
+                                       for( int j=apos; j<apos+alen; j++ )
+                                               c.append(0, cix+aix[j], 
avals[j]);
                                }
                        }
                        else if( cols%clen==0 ) { //SPECIAL N:1 MATRIX->MATRIX
@@ -1231,22 +1235,19 @@ public class LibMatrixReorg
                                //note: cache-friendly on a but not c; 
append-only
                                //long cix because total cells in sparse can be 
larger than int
                                long cix = 0;
-                               
-                               for( int i=0; i<rlen; i++ ) 
-                               {
+                               for( int i=0; i<rlen; i++ ) {
                                        if( !a.isEmpty(i) ){
                                                int apos = a.pos(i);
                                                int alen = a.size(i);
                                                int[] aix = a.indexes(i);
-                                               double[] avals = a.values(i);   
-                                               for( int j=apos; j<apos+alen; 
j++ )
-                                               {
+                                               double[] avals = a.values(i);
+                                               for( int j=apos; j<apos+alen; 
j++ ) {
                                                        int ci = 
(int)((cix+aix[j])/cols);
                                                        int cj = 
(int)((cix+aix[j])%cols);
                                                        c.allocate(ci, estnnz, 
cols);
                                                        c.append(ci, cj, 
avals[j]);
                                                }
-                                       }       
+                                       }
                                        
                                        cix += clen;
                                }
@@ -1265,35 +1266,31 @@ public class LibMatrixReorg
                                        int alen = a.size(0); //always pos 0
                                        int[] aix = a.indexes(0);
                                        double[] avals = a.values(0);
-                                       for( int j=0; j<alen; j++ ) 
-                                       {
+                                       for( int j=0; j<alen; j++ ) {
                                                int ci = aix[j]%rows;
-                                               int cj = aix[j]/rows;       
+                                               int cj = aix[j]/rows;
                                                c.allocate(ci, estnnz, cols);
                                                c.append(ci, cj, avals[j]);
                                        }
-                               }                                               
                
+                               }
                        }
                        else //GENERAL CASE: MATRIX->MATRIX
                        {
                                //note: cache-friendly on a but not c; 
append&sort, in-place w/o shifts
-                               for( int i=0; i<rlen; i++ ) 
-                               {
-                                       if( !a.isEmpty(i) ){
-                                               int apos = a.pos(i);
-                                               int alen = a.size(i);
-                                               int[] aix = a.indexes(i);
-                                               double[] avals = a.values(i);   
-                                               for( int j=apos; j<apos+alen; 
j++ )
-                                               {
-                                                       //long tmpix because 
total cells in sparse can be larger than int
-                                                       long tmpix = 
(long)aix[j]*rlen+i;
-                                                       int ci = 
(int)(tmpix%rows);
-                                                       int cj = 
(int)(tmpix/rows); 
-                                                       c.allocate(ci, estnnz, 
cols);
-                                                       c.append(ci, cj, 
avals[j]);
-                                               }
-                                       }       
+                               for( int i=0; i<rlen; i++ ) {
+                                       if( a.isEmpty(i) ) continue;
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aix = a.indexes(i);
+                                       double[] avals = a.values(i);
+                                       for( int j=apos; j<apos+alen; j++ ) {
+                                               //long tmpix because total 
cells in sparse can be larger than int
+                                               long tmpix = 
(long)aix[j]*rlen+i;
+                                               int ci = (int)(tmpix%rows);
+                                               int cj = (int)(tmpix/rows); 
+                                               c.allocate(ci, estnnz, cols);
+                                               c.append(ci, cj, avals[j]);
+                                       }
                                }
                                out.sortSparseRows();
                        }
@@ -1314,26 +1311,30 @@ public class LibMatrixReorg
                int estnnz = (int) (in.nonZeros/rows);
                
                //sparse reshape
-               double[] a = in.getDenseBlockValues();
+               DenseBlock a = in.getDenseBlock();
                SparseBlock c = out.sparseBlock;
                
-               if( rowwise )
-               {
+               if( rowwise ) {
                        //NOTES on special cases
                        // * vector-matrix, matrix-vector not really different 
from general
                        
                        //GENERAL CASE: MATRIX->MATRIX
                        //note: cache-friendly on a and c; append-only
-                       for( int i=0, aix=0; i<rows; i++ ) 
-                               for( int j=0; j<cols; j++ )
-                               {
-                                       double val = a[aix++];
+                       for( int i=0; i<rlen; i++ ) {
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<clen; j++ ) {
+                                       double val = avals[aix+j];
                                        if( val != 0 ) {
-                                               c.allocate(i, estnnz, cols);
-                                               c.append(i, j, val);
+                                               long cix = (long) i*clen+j;
+                                               int ci = (int)(cix / cols);
+                                               int cj = (int)(cix % cols);
+                                               c.allocate(ci, estnnz, cols);
+                                               c.append(ci, cj, val);
                                        }
                                }
-               }       
+                       }
+               }
                else //colwise
                {
                        //NOTES on special cases
@@ -1342,10 +1343,10 @@ public class LibMatrixReorg
                        if( rlen==1 ) //VECTOR->MATRIX
                        {
                                //note: cache-friendly on a but not c; 
append-only
+                               double[] avals = a.valuesAt(0);
                                for( int j=0, aix=0; j<cols; j++ )
-                                       for( int i=0; i<rows; i++ ) 
-                                       {
-                                               double val = a[aix++];
+                                       for( int i=0; i<rows; i++ ) {
+                                               double val = avals[aix++];
                                                if( val != 0 ) {
                                                        c.allocate(i, estnnz, 
cols);
                                                        c.append(i, j, val);
@@ -1356,16 +1357,15 @@ public class LibMatrixReorg
                        {
                                //note: cache-friendly on c but not a; 
append-only
                                for( int i=0; i<rows; i++ )
-                                       for( int j=0, aix2=i; j<cols; j++, 
aix2+=rows )
-                                       {
+                                       for( int j=0, aix2=i; j<cols; j++, 
aix2+=rows ) {
                                                int ai = aix2%rlen;
                                                int aj = aix2/rlen;
-                                               double val = a[ ai*clen+aj ];
+                                               double val = a.get(ai, aj);
                                                if( val != 0 ) {
                                                        c.allocate(i, estnnz, 
cols);
                                                        c.append(i, j, val);
                                                }
-                                       }                       
+                                       }
                        }
                }
        }
@@ -1385,7 +1385,7 @@ public class LibMatrixReorg
                
                //sparse/dense reshape
                SparseBlock a = in.sparseBlock;
-               double[] c = out.getDenseBlockValues();
+               DenseBlock c = out.getDenseBlock();
                
                if( rowwise )
                {
@@ -1394,18 +1394,20 @@ public class LibMatrixReorg
                        
                        //GENERAL CASE: MATRIX->MATRIX
                        //note: cache-friendly on a and c
-                       for( int i=0, cix=0; i<rlen; i++, cix+=clen ) 
-                       {
+                       for( int i=0, cix=0; i<rlen; i++, cix+=clen ) {
                                if( !a.isEmpty(i) ) {
                                        int apos = a.pos(i);
                                        int alen = a.size(i);
                                        int[] aix = a.indexes(i);
-                                       double[] avals = a.values(i);   
-                                       for( int j=apos; j<apos+alen; j++ )
-                                               c[cix+aix[j]] = avals[j];
-                               }       
+                                       double[] avals = a.values(i);
+                                       for( int j=apos; j<apos+alen; j++ ) {
+                                               int ci = (cix+aix[j]) / cols;
+                                               int cj = (cix+aix[j]) % cols;
+                                               c.set(ci, cj, avals[j]);
+                                       }
+                               }
                        }
-               }       
+               }
                else //colwise
                {
                        //NOTES on special cases
@@ -1414,35 +1416,34 @@ public class LibMatrixReorg
                        if( rlen==1 ) //VECTOR->MATRIX
                        {
                                //note: cache-friendly on a but not c
-                               if( !a.isEmpty(0) ){
+                               double[] cvals = c.valuesAt(0);
+                               if( !a.isEmpty(0) ) {
                                        int apos = a.pos(0);
                                        int alen = a.size(0);
                                        int[] aix = a.indexes(0);
-                                       double[] avals = a.values(0);   
+                                       double[] avals = a.values(0);
                                        for( int j=apos; j<apos+alen; j++ ) {
                                                int ci = aix[j]%rows;
-                                               int cj = aix[j]/rows;       
-                                               c[ci*cols+cj] = avals[j];
+                                               int cj = aix[j]/rows;
+                                               cvals[ci*cols+cj] = avals[j];
                                        }
-                               }                                               
                
+                               }
                        }
                        else //GENERAL CASE: MATRIX->MATRIX
                        {
                                //note: cache-friendly on a but not c
-                               for( int i=0; i<rlen; i++ ) 
-                               {
-                                       if( !a.isEmpty(i) ){
-                                               int apos = a.pos(i);
-                                               int alen = a.size(i);
-                                               int[] aix = a.indexes(i);
-                                               double[] avals = a.values(i);   
-                                               for( int j=apos; j<apos+alen; 
j++ ) {
-                                                       int tmpix = 
aix[j]*rlen+i;
-                                                       int ci = tmpix%rows;
-                                                       int cj = tmpix/rows;   
-                                                       c[ci*cols+cj] = 
avals[j];
-                                               }
-                                       }       
+                               for( int i=0; i<rlen; i++ ) {
+                                       if( a.isEmpty(i) ) continue;
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aix = a.indexes(i);
+                                       double[] avals = a.values(i);
+                                       for( int j=apos; j<apos+alen; j++ ) {
+                                               int tmpix = aix[j]*rlen+i;
+                                               int ci = tmpix%rows;
+                                               int cj = tmpix/rows;
+                                               c.set(ci, cj, avals[j]);
+                                       }
                                }
                        }
                }
@@ -1674,10 +1675,10 @@ public class LibMatrixReorg
 
        private static MatrixBlock removeEmptyRows(MatrixBlock in, MatrixBlock 
ret, MatrixBlock select) 
                throws DMLRuntimeException 
-       {       
+       {
                final int m = in.rlen;
                final int n = in.clen;
-               boolean[] flags = null; 
+               boolean[] flags = null;
                int rlen2 = 0; 
                
                //Step 0: special case handling
@@ -1715,15 +1716,18 @@ public class LibMatrixReorg
                                        rlen2 += (flags[i] = !a.isEmpty(i)) ? 1 
: 0;
                        }
                        else { //DENSE
-                               double[] a = in.getDenseBlockValues();
-                               for(int i=0, aix=0; i<m; i++, aix+=n)
+                               DenseBlock a = in.getDenseBlock();
+                               for( int i=0; i<m; i++ ) {
+                                       double[] avals = a.values(i);
+                                       int aix = a.pos(i);
                                        for(int j=0; j<n; j++)
-                                               if( a[aix+j] != 0 ) {
+                                               if( avals[aix+j] != 0 ) {
                                                        flags[i] = true;
                                                        rlen2++;
                                                        //early abort for 
current row
                                                        break; 
                                                }
+                               }
                        }
                } 
                else {
@@ -1759,25 +1763,26 @@ public class LibMatrixReorg
                else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
                {
                        ret.allocateDenseBlock();
-                       double[] a = in.getDenseBlockValues();
-                       double[] c = ret.getDenseBlockValues();
-                       
-                       for( int i=0, aix=0, cix=0; i<m; i++, aix+=n )
+                       DenseBlock a = in.getDenseBlock();
+                       DenseBlock c = ret.getDenseBlock();
+                       for( int i=0, ci=0; i<m; i++ )
                                if( flags[i] ) {
-                                       System.arraycopy(a, aix, c, cix, n);
-                                       cix += n; //target index
+                                       System.arraycopy(a.values(i),
+                                               a.pos(i), c.values(ci), 
c.pos(ci), n);
+                                       ci++; //target row index
                                }
                }
                else //SPARSE <- DENSE
                {
                        ret.allocateSparseRowsBlock();
-                       double[] a = in.getDenseBlockValues();
-                       
-                       for( int i=0, aix=0, cix=0; i<m; i++, aix+=n )
+                       DenseBlock a = in.getDenseBlock();
+                       for( int i=0, ci=0; i<m; i++ )
                                if( flags[i] ) {
+                                       double[] avals = a.values(i);
+                                       int aix = a.pos(i);
                                        for( int j=0; j<n; j++ )
-                                               ret.appendValue(cix, j, 
a[aix+j]);
-                                       cix++;
+                                               ret.appendValue(ci, j, 
avals[aix+j]);
+                                       ci++;
                                }
                }
                
@@ -1785,7 +1790,7 @@ public class LibMatrixReorg
                ret.nonZeros = (select==null) ?
                        in.nonZeros : ret.recomputeNonZeros();
                ret.examSparsity();
-
+               
                return ret;
        }
 
@@ -1802,10 +1807,8 @@ public class LibMatrixReorg
                if (select == null) 
                {
                        flags = new boolean[ n ]; //false
-                       if( in.sparse ) //SPARSE 
-                       {
+                       if( in.sparse ) { //SPARSE 
                                SparseBlock a = in.sparseBlock;
-                               
                                for( int i=0; i<m; i++ ) 
                                        if ( !a.isEmpty(i) ) {
                                                int apos = a.pos(i);
@@ -1815,19 +1818,19 @@ public class LibMatrixReorg
                                                        flags[ aix[j] ] = true;
                                        }
                        }
-                       else //DENSE
-                       {
-                               double[] a = in.getDenseBlockValues();
-                               for(int i=0, aix=0; i<m; i++)
-                                       for(int j=0; j<n; j++, aix++)
-                                               if( a[aix] != 0 )
-                                                       flags[j] = true;
+                       else { //DENSE
+                               DenseBlock a = in.getDenseBlock();
+                               for( int i=0; i<m; i++ ) {
+                                       double[] avals = a.values(i);
+                                       int aix = a.pos(i);
+                                       for( int j=0; j<n; j++ )
+                                               flags[j] |= (avals[aix+j] != 0);
+                               }
                        }
                } 
                else {
                        flags = DataConverter.convertToBooleanVector(select);
                }
-
                
                //Step 2: determine number of columns
                int clen2 = 0;
@@ -1866,7 +1869,6 @@ public class LibMatrixReorg
                        {
                                //note: output dense or sparse
                                SparseBlock a = in.sparseBlock;
-                               
                                for( int i=0; i<m; i++ )
                                        if ( !a.isEmpty(i) ) {
                                                int apos = a.pos(i);
@@ -1878,26 +1880,32 @@ public class LibMatrixReorg
                                                                
ret.appendValue(i, cix[aix[j]], avals[j]);
                                        }
                        }
-                       else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
-                       {
+                       else if( !in.sparse && !ret.sparse ) { //DENSE <- DENSE
                                ret.allocateDenseBlock();
-                               double[] a = in.getDenseBlockValues();
-                               double[] c = ret.getDenseBlockValues();
-                               
-                               for(int i=0, aix=0, lcix=0; i<m; i++, 
lcix+=clen2)
-                                       for(int j=0; j<n; j++, aix++)
+                               DenseBlock a = in.getDenseBlock();
+                               DenseBlock c = ret.getDenseBlock();
+                               for( int i=0; i<m; i++ ) {
+                                       double[] avals = a.values(i);
+                                       double[] cvals = c.values(i);
+                                       int aix = a.pos(i);
+                                       int lcix = c.pos(i);
+                                       for( int j=0; j<n; j++ )
                                                if( flags[j] )
-                                                        c[ lcix+cix[j] ] = 
a[aix];     
+                                                        cvals[ lcix+cix[j] ] = 
avals[aix+j];
+                               }
                        }
-                       else //SPARSE <- DENSE
-                       {
+                       else { //SPARSE <- DENSE
                                ret.allocateSparseRowsBlock();
-                               double[] a = in.getDenseBlockValues();
-                               
-                               for(int i=0, aix=0; i<m; i++)
-                                       for(int j=0; j<n; j++, aix++)
-                                               if( flags[j] && a[aix]!=0 )
-                                                        ret.appendValue(i, 
cix[j], a[aix]);
+                               DenseBlock a = in.getDenseBlock();
+                               for( int i=0; i<m; i++ ) {
+                                       double[] avals = a.values(i);
+                                       int aix = a.pos(i);
+                                       for( int j=0; j<n; j++ ) {
+                                               double aval = avals[aix+j];
+                                               if( flags[j] && aval!=0 )
+                                                        ret.appendValue(i, 
cix[j], aval);
+                                       }
+                               }
                        }
                }
                
@@ -1926,7 +1934,7 @@ public class LibMatrixReorg
                double[] tmp = new double[Math.min(blksize,clen)];
                
                //expand input vertically  (input vector likely dense 
-               //but generic implementation for general case)          
+               //but generic implementation for general case)
                for( int i=0; i<clen; i+=blksize )
                {
                        //create sorted block indexes (append buffer)
@@ -2014,6 +2022,8 @@ public class LibMatrixReorg
                
                //expand input horizontally (input vector likely dense 
                //but generic implementation for general case)
+               DenseBlock cd = ret.getDenseBlock();
+               SparseBlock cs = ret.getSparseBlock();
                for( int i=rl; i<ru; i++ )
                {
                        //get value and cast if necessary (table)
@@ -2031,12 +2041,12 @@ public class LibMatrixReorg
                                if( cix != null ) {
                                        cix[i] = (int)(val-1);
                                }
-                               else if( ret.isInSparseFormat() ) {
-                                       ret.sparseBlock.allocate(i, 1);
-                                       ret.sparseBlock.append(i, (int)(val-1), 
1);
+                               else if( ret.sparse ) {
+                                       cs.allocate(i, 1);
+                                       cs.append(i, (int)(val-1), 1);
                                }
                                else
-                                       ret.setValueDenseUnsafe(i, 
(int)(val-1), 1);
+                                       cd.set(i, (int)(val-1), 1);
                                lnnz ++;
                        }
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/0a262dc3/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 4087e90..c3575ed 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -625,19 +625,6 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return denseBlock.get(r, c);
        }
 
-       /**
-        * This can be only called when you know you have properly allocated 
spaces for a dense representation
-        * and r and c are in the the range of the dimension
-        * Note: this function won't keep track of the nozeros
-        * 
-        * @param r row
-        * @param c column
-        * @param v value
-        */
-       public void setValueDenseUnsafe(int r, int c, double v) {
-               denseBlock.set(r, c, v);
-       }
-       
        public double getValueSparseUnsafe(int r, int c) {
                if(sparseBlock==null || sparseBlock.isEmpty(r))
                        return 0;
@@ -3247,10 +3234,12 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                
                MatrixBlock result = checkType(ret);
 
-               //compute output dimensions and sparsity flag
+               //compute output dimensions and sparsity, note that for diagM2V,
+               //the input nnz might be much larger than the nnz of the output
                CellIndex tempCellIndex = new CellIndex(-1,-1);
                op.fn.computeDimension( rlen, clen, tempCellIndex );
-               boolean sps = evalSparseFormatInMemory(tempCellIndex.row, 
tempCellIndex.column, nonZeros);
+               long ennz = Math.min(nonZeros, 
(long)tempCellIndex.row*tempCellIndex.column);
+               boolean sps = evalSparseFormatInMemory(tempCellIndex.row, 
tempCellIndex.column, ennz);
 
                //prepare output matrix block w/ right meta data
                if( result == null )
@@ -3284,28 +3273,31 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                }
                        }
                        else if( !sparse && denseBlock != null ) {
-                               if( result.isInSparseFormat() ) //SPARSE<-DENSE
-                               {
-                                       double[] a = getDenseBlockValues();
-                                       for( int i=0, aix=0; i<rlen; i++ )
-                                               for( int j=0; j<clen; j++, 
aix++ ) {
+                               if( result.isInSparseFormat() ) { 
//SPARSE<-DENSE
+                                       DenseBlock a = getDenseBlock();
+                                       for( int i=0; i<rlen; i++ ) {
+                                               double[] avals = a.values(i);
+                                               int aix = a.pos(i);
+                                               for( int j=0; j<clen; j++ ) {
                                                        temp.set(i, j);
                                                        op.fn.execute(temp, 
temp);
-                                                       
result.appendValue(temp.row, temp.column, a[aix]);      
+                                                       
result.appendValue(temp.row, temp.column, avals[aix+j]);
                                                }
+                                       }
                                }
-                               else //DENSE<-DENSE
-                               {
+                               else { //DENSE<-DENSE
                                        result.allocateDenseBlock();
-                                       double[] a = getDenseBlockValues();
-                                       double[] c = 
result.getDenseBlockValues();
-                                       int n = result.clen;
-                                       for( int i=0, aix=0; i<rlen; i++ )
-                                               for( int j=0; j<clen; j++, 
aix++ ) {
+                                       DenseBlock a = getDenseBlock();
+                                       DenseBlock c = result.getDenseBlock();
+                                       for( int i=0; i<rlen; i++ ) {
+                                               double[] avals = a.values(i);
+                                               int aix = a.pos(i);
+                                               for( int j=0; j<clen; j++ ) {
                                                        temp.set(i, j);
                                                        op.fn.execute(temp, 
temp);
-                                                       
c[temp.row*n+temp.column] = a[aix];     
+                                                       c.set(temp.row, 
temp.column, avals[aix+j]);
                                                }
+                                       }
                                        result.nonZeros = nonZeros;
                                }
                        }

Reply via email to