[4/5] incubator-systemml git commit: [SYSTEMML-382] Initial runtime integration sparse matrix blocks

mboehm7 Wed, 20 Jan 2016 14:59:46 -0800

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8ba0fdcc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index f617f40..01dd8c0 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -27,7 +27,6 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
 import org.apache.commons.math3.util.FastMath;
-
 import org.apache.sysml.lops.MapMultChain.ChainType;
 import org.apache.sysml.lops.WeightedCrossEntropy.WCeMMType;
 import org.apache.sysml.lops.WeightedDivMM.WDivMMType;
@@ -1170,16 +1169,14 @@ public class LibMatrixMult
                        final int blocksizeK = 32; 
                        //note: in contrast to dense-dense, no blocking over j 
(would require maintaining blocksizeK indexes, counter-productive on skew)
                        
-                       SparseRow[] b = m2.sparseBlock;
+                       SparseBlock b = m2.sparseBlock;
                        
                        if( pm2 && m==1 )          //VECTOR-MATRIX
                        {
                                //parallelization over rows in rhs matrix
                                for( int k=rl; k<ru; k++ )
-                                       if( a[k] != 0 && b[k] != null && 
!b[k].isEmpty() ) {
-                                               int[] bix = 
b[k].getIndexContainer();
-                                               double[] bvals = 
b[k].getValueContainer();                                                       
       
-                                               vectMultiplyAdd(a[k], bvals, c, 
bix, 0, b[k].size());
+                                       if( a[k] != 0 && !b.isEmpty(k) ) {      
                                                        
+                                               vectMultiplyAdd(a[k], 
b.values(k), c, b.indexes(k), b.pos(k), 0, b.size(k));
                                        }
                        }
                        else                       //MATRIX-MATRIX
@@ -1199,12 +1196,8 @@ public class LibMatrixMult
                                                for( int k = 0; k < bklen; k++ )
                                                        {
                                                                double val = 
a[aixi+k];
-                                                               SparseRow brow 
= b[ bk+k ];
-                                                               if( val != 0 && 
brow != null && !brow.isEmpty() ) {
-                                                                       int 
blen = brow.size();
-                                                                       int[] 
bix = brow.getIndexContainer();
-                                                                       
double[] bvals = brow.getValueContainer();                                      
                        
-                                                                       
vectMultiplyAdd(val, bvals, c, bix, cixj, blen);
+                                                               if( val != 0 && 
!b.isEmpty(bk+k) ) {
+                                                                       
vectMultiplyAdd(val, b.values(bk+k), c, b.indexes(bk+k), b.pos(bk+k), cixj, 
b.size(bk+k));
                                                                }
                                                        }
                                        }
@@ -1213,19 +1206,20 @@ public class LibMatrixMult
                }
                else
                {
+                       SparseBlock b = m2.sparseBlock;
                        for( int i=rl, aix=rl*cd, cix=rl*n; i < ru; i++, cix+=n 
) 
                                for(int k = 0; k < cd; k++, aix++ ) 
                                {
                                        double val = a[aix];
                                        if( val!=0 )
                                        {
-                                               SparseRow brow = 
m2.sparseBlock[ k ];
-                                               if( brow != null && 
!brow.isEmpty() ) 
+                                               if( !b.isEmpty(k) ) 
                                                {
-                                                       int blen = brow.size();
-                                                       int[] bix = 
brow.getIndexContainer();
-                                                       double[] bvals = 
brow.getValueContainer();      
-                                                       for(int j = 0; j < 
blen; j++)
+                                                       int bpos = b.pos(k);
+                                                       int blen = b.size(k);
+                                                       int[] bix = 
b.indexes(k);
+                                                       double[] bvals = 
b.values(k);   
+                                                       for(int j = bpos; j < 
bpos+blen; j++)
                                                                c[cix+bix[j]] 
+= val * bvals[j];                                                              
  
                                                }
                                        }
@@ -1251,43 +1245,32 @@ public class LibMatrixMult
 
                if( LOW_LEVEL_OPTIMIZATION )
                {
+                       SparseBlock a = m1.sparseBlock;
+                       
                        if( m==1 && n==1 )         //DOT PRODUCT
                        {
-                               SparseRow arow = m1.sparseBlock[0];
-                               if( arow != null && !arow.isEmpty() )
-                               {
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();
-                                       
-                                       c[0] = dotProduct(avals, b, aix, 0, 
alen);
+                               if( !a.isEmpty(0) ) {
+                                       c[0] = dotProduct(a.values(0), b, 
a.indexes(0), a.pos(0), 0, a.size(0));
                                }
                        }
                        else if( n==1 )            //MATRIX-VECTOR
                        {
-                               for( int i=rl; i<Math.min(ru, 
m1.sparseBlock.length); i++ )
+                               for( int i=rl; i<Math.min(ru, a.numRows()); i++ 
)
                                {
-                                       SparseRow arow = m1.sparseBlock[i];
-                                       if( arow != null && !arow.isEmpty() ) 
-                                       {
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();                                      
-                                       
-                                               c[i] = dotProduct(avals, b, 
aix, 0, alen);                                                      
+                                       if( !a.isEmpty(i) ) {
+                                               c[i] = dotProduct(a.values(i), 
b, a.indexes(i), a.pos(i), 0, a.size(i));                                       
                 
                                        }
                                }
                        }
                        else if( pm2 && m==1 )     //VECTOR-MATRIX
                        {
                                //parallelization over rows in rhs matrix
-                               SparseRow arow = m1.sparseBlock[0];
-                               if( arow != null && !arow.isEmpty() ) 
+                               if( !a.isEmpty(0) ) 
                                {
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();                                      
-                                       int rlix = (rl==0) ? 0 : 
arow.searchIndexesFirstGTE(rl);
+                                       int alen = a.size(0);
+                                       int[] aix = a.indexes(0);
+                                       double[] avals = a.values(0);           
                        
+                                       int rlix = (rl==0) ? 0 : 
a.posFIndexGTE(0,rl);
                                        rlix = (rlix>=0) ? rlix : alen;
                                        
                                        for( int k=rlix; k<alen && aix[k]<ru; 
k++ ) {
@@ -1300,18 +1283,18 @@ public class LibMatrixMult
                        }
                        else if( pm2 && m<=16 )    //MATRIX-MATRIX (short lhs) 
                        {
-                               SparseRow[] a = m1.sparseBlock;
-                               for( int i=0, cix=0; i<a.length; i++, cix+=n )
-                                       if( a[i] != null && !a[i].isEmpty() ) 
+                               for( int i=0, cix=0; i<a.numRows(); i++, cix+=n 
)
+                                       if( !a.isEmpty(i) ) 
                                        {
-                                               int alen = a[i].size();
-                                               int[] aix = 
a[i].getIndexContainer();
-                                               double[] avals = 
a[i].getValueContainer();                                      
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);   
                                
                                                
-                                               int k1 = (rl==0) ? 0 : 
a[i].searchIndexesFirstGTE(rl);
-                                               k1 = (k1>=0) ? k1 : alen;
-                                               int k2 = (ru==cd) ? alen : 
a[i].searchIndexesFirstGTE(ru);
-                                               k2 = (k2>=0) ? k2 : alen;
+                                               int k1 = (rl==0) ? apos : 
a.posFIndexGTE(i, rl);
+                                               k1 = (k1>=0) ? k1 : apos+alen;
+                                               int k2 = (ru==cd) ? apos+alen : 
a.posFIndexGTE(i, ru);
+                                               k2 = (k2>=0) ? k2 : apos+alen;
                                                
                                                //rest not aligned to blocks of 
4 rows
                                        final int bn = (k2-k1) % 4;
@@ -1330,14 +1313,14 @@ public class LibMatrixMult
                        }
                        else                       //MATRIX-MATRIX
                        {
-                               for( int i=rl, cix=rl*n; i<Math.min(ru, 
m1.sparseBlock.length); i++, cix+=n )
+                               for( int i=rl, cix=rl*n; i<Math.min(ru, 
a.numRows()); i++, cix+=n )
                                {
-                                       SparseRow arow = m1.sparseBlock[i];
-                                       if( arow != null && !arow.isEmpty() ) 
+                                       if( !a.isEmpty(i) ) 
                                        {
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();                                      
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);   
                                
                                                
                                                if( alen==1 && avals[0]==1 ) 
//ROW SELECTION 
                                                {
@@ -1349,13 +1332,13 @@ public class LibMatrixMult
                                                        //rest not aligned to 
blocks of 4 rows
                                                final int bn = alen % 4;
                                                switch( bn ){
-                                                       case 1: 
vectMultiplyAdd(avals[0], b, c, aix[0]*n, cix, n); break;
-                                               case 2: 
vectMultiplyAdd2(avals[0],avals[1], b, c, aix[0]*n, aix[1]*n, cix, n); break;
-                                                       case 3: 
vectMultiplyAdd3(avals[0],avals[1],avals[2], b, c, aix[0]*n, aix[1]*n, 
aix[2]*n, cix, n); break;
+                                                       case 1: 
vectMultiplyAdd(avals[apos], b, c, aix[apos]*n, cix, n); break;
+                                               case 2: 
vectMultiplyAdd2(avals[apos],avals[apos+1], b, c, aix[apos]*n, aix[apos+1]*n, 
cix, n); break;
+                                                       case 3: 
vectMultiplyAdd3(avals[apos],avals[apos+1],avals[apos+2], b, c, aix[apos]*n, 
aix[apos+1]*n, aix[apos+2]*n, cix, n); break;
                                                }
                                                
                                                //compute blocks of 4 rows 
(core inner loop)
-                                               for( int k = bn; k<alen; k+=4 ) 
{
+                                               for( int k = apos+bn; 
k<apos+alen; k+=4 ) {
                                                        vectMultiplyAdd4( 
avals[k], avals[k+1], avals[k+2], avals[k+3], b, c, 
                                                                                
  aix[k]*n, aix[k+1]*n, aix[k+2]*n, aix[k+3]*n, cix, n );
                                                }
@@ -1366,17 +1349,17 @@ public class LibMatrixMult
                }
                else
                {
-                       for( int i=rl, cix=rl*n; i<Math.min(ru, 
m1.sparseBlock.length); i++, cix+=n )
+                       SparseBlock a = m1.sparseBlock;
+                       for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); 
i++, cix+=n )
                        {
-                               SparseRow arow = m1.sparseBlock[i];
-                               if( arow != null && !arow.isEmpty() ) 
+                               if( !a.isEmpty(i) ) 
                                {
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();                                      
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aix = a.indexes(i);
+                                       double[] avals = a.values(i);           
                        
                                        
-                                       for(int k = 0; k < alen; k++) 
-                                       {
+                                       for(int k = apos; k < apos+alen; k++) {
                                                double val = avals[k];
                                                for(int j = 0, bix=aix[k]*n; j 
< n; j++)
                                                        c[cix+j] += val * 
b[bix+j];                                                             
@@ -1396,58 +1379,60 @@ public class LibMatrixMult
        private static void matrixMultSparseSparse(MatrixBlock m1, MatrixBlock 
m2, MatrixBlock ret, boolean pm2, int rl, int ru) 
                throws DMLRuntimeException
        {       
-               SparseRow[] b = m2.sparseBlock;
+               SparseBlock a = m1.sparseBlock;
+               SparseBlock b = m2.sparseBlock;
                double[] c = ret.denseBlock;
                int m = m1.rlen;
                int n = m2.clen;
                
+               //TODO perf sparse block
+               
                // MATRIX-MATRIX (VV, MV not applicable here because V always 
dense)
                if(LOW_LEVEL_OPTIMIZATION)
                {
                        if( pm2 && m==1 )          //VECTOR-MATRIX
                        {
                                //parallelization over rows in rhs matrix
-                               SparseRow arow = m1.sparseBlock[0];
-                               if( arow != null && !arow.isEmpty() ) 
+                               if( !a.isEmpty(0) ) 
                                {
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();                                      
-                                       int rlix = (rl==0) ? 0 : 
arow.searchIndexesFirstGTE(rl);
+                                       int alen = a.size(0);
+                                       int[] aix = a.indexes(0);
+                                       double[] avals = a.values(0);           
                        
+                                       int rlix = (rl==0) ? 0 : 
a.posFIndexGTE(0,rl);
                                        rlix = (rlix>=0) ? rlix : alen;
                                        
                                        for( int k=rlix; k<alen && aix[k]<ru; 
k++ )
-                                               if( b[aix[k]] != null && 
!b[aix[k]].isEmpty() ) {
-                                                       SparseRow brow = 
b[aix[k]];             
-                                                       int blen = brow.size();
-                                                       int[] bix = 
brow.getIndexContainer();
-                                                       double[] bvals = 
brow.getValueContainer();                                                       
       
-                                                       
vectMultiplyAdd(avals[k], bvals, c, bix, 0, blen);
+                                               if( !b.isEmpty(aix[k]) ) {
+                                                       int bpos = 
b.pos(aix[k]);
+                                                       int blen = 
b.size(aix[k]);
+                                                       int[] bix = 
b.indexes(aix[k]);
+                                                       double[] bvals = 
b.values(aix[k]);                                                              
+                                                       
vectMultiplyAdd(avals[k], bvals, c, bix, bpos, 0, blen);
                                                }                       
                                }
                        }       
                        else                       //MATRIX-MATRIX
                        {
-                               for( int i=rl, cix=rl*n; i<Math.min(ru, 
m1.sparseBlock.length); i++, cix+=n )
+                               for( int i=rl, cix=rl*n; i<Math.min(ru, 
a.numRows()); i++, cix+=n )
                                {
-                                       SparseRow arow = m1.sparseBlock[i];
-                                       if( arow != null && !arow.isEmpty() ) 
+                                       if( !a.isEmpty(i) ) 
                                        {
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();                                      
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);   
                                
                                                
-                                               for(int k = 0; k < alen; k++) 
+                                               for(int k = apos; k < 
apos+alen; k++) 
                                                {
                                                        double val = avals[k];
-                                                       SparseRow brow = b[ 
aix[k] ];
-                                                       if( brow != null && 
!brow.isEmpty() ) 
+                                                       if( !b.isEmpty(aix[k]) 
) 
                                                        {
-                                                               int blen = 
brow.size();
-                                                               int[] bix = 
brow.getIndexContainer();
-                                                               double[] bvals 
= brow.getValueContainer();      
+                                                               int bpos = 
b.pos(aix[k]);
+                                                               int blen = 
b.size(aix[k]);
+                                                               int[] bix = 
b.indexes(aix[k]);
+                                                               double[] bvals 
= b.values(aix[k]);      
                                                                
-                                                               
vectMultiplyAdd(val, bvals, c, bix, cix, blen);
+                                                               
vectMultiplyAdd(val, bvals, c, bix, bpos, cix, blen);
                                                        }
                                                }                               
                
                                        }
@@ -1456,25 +1441,25 @@ public class LibMatrixMult
                }
                else
                {
-                       for( int i=rl, cix=rl*n; i<Math.min(ru, 
m1.sparseBlock.length); i++, cix+=n )
+                       for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); 
i++, cix+=n )
                        {
-                               SparseRow arow = m1.sparseBlock[i];
-                               if( arow != null && !arow.isEmpty() ) 
+                               if( !a.isEmpty(i) ) 
                                {
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();                                      
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aix = a.indexes(i);
+                                       double[] avals = a.values(i);           
                        
                                        
-                                       for(int k = 0; k < alen; k++) 
+                                       for(int k = apos; k < apos+alen; k++) 
                                        {
                                                double val = avals[k];
-                                               SparseRow brow = 
m2.sparseBlock[ aix[k] ];
-                                               if( brow != null && 
!brow.isEmpty() ) 
+                                               if( !b.isEmpty(aix[k]) ) 
                                                {
-                                                       int blen = brow.size();
-                                                       int[] bix = 
brow.getIndexContainer();
-                                                       double[] bvals = 
brow.getValueContainer();      
-                                                       for(int j = 0; j < 
blen; j++)
+                                                       int bpos = 
b.pos(aix[k]);
+                                                       int blen = 
b.size(aix[k]);
+                                                       int[] bix = 
b.indexes(aix[k]);
+                                                       double[] bvals = 
b.values(aix[k]);      
+                                                       for(int j = bpos; j < 
bpos+blen; j++)
                                                                c[cix+bix[j]] 
+= val * bvals[j];                                                              
  
                                                }
                                        }                                       
        
@@ -1499,6 +1484,8 @@ public class LibMatrixMult
        private static void matrixMultUltraSparse(MatrixBlock m1, MatrixBlock 
m2, MatrixBlock ret, int rl, int ru) 
                throws DMLRuntimeException 
        {
+               //TODO perf sparse block, consider iterators
+               
                boolean leftUS = m1.isUltraSparse();
                final int m  = m1.rlen;
                final int cd = m1.clen;
@@ -1506,26 +1493,27 @@ public class LibMatrixMult
                
                if( leftUS ) //left is ultra-sparse (IKJ)
                {
+                       SparseBlock a = m1.sparseBlock;
                        boolean rightSparse = m2.sparse;
                        
                        for( int i=rl; i<ru; i++ )
                        {
-                               SparseRow arow = m1.sparseBlock[ i ];
-                               if( arow != null && !arow.isEmpty() ) 
+                               if( !a.isEmpty(i) ) 
                                {
-                                       int alen = arow.size();
-                                       int[] aixs = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();      
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aixs = a.indexes(i);
+                                       double[] avals = a.values(i);
                                        
-                                       if( alen==1 && avals[0]==1 ) //ROW 
SELECTION (no aggregation)
+                                       if( alen==1 && avals[apos]==1 ) //ROW 
SELECTION (no aggregation)
                                        {
-                                               int aix = aixs[0];
+                                               int aix = aixs[apos];
                                                if( rightSparse ) { //sparse 
right matrix (full row copy)
-                                                       if( 
m2.sparseBlock!=null && m2.sparseBlock[aix]!=null ) {
+                                                       if( 
!m2.sparseBlock.isEmpty(aix) ) {
                                                                ret.rlen=m;
                                                                
ret.allocateSparseRowsBlock(false); //allocation on demand
-                                                               
ret.sparseBlock[i] = new SparseRow(m2.sparseBlock[aix]); 
-                                                               ret.nonZeros += 
ret.sparseBlock[i].size();
+                                                               
ret.sparseBlock.set(i, new SparseRow(m2.sparseBlock.get(aix))); 
+                                                               ret.nonZeros += 
ret.sparseBlock.size(i);
                                                        }
                                                }
                                                else { //dense right matrix 
(append all values)
@@ -1535,7 +1523,7 @@ public class LibMatrixMult
                                        }
                                        else //GENERAL CASE
                                        {
-                                               for( int k=0; k<alen; k++ )
+                                               for( int k=apos; k<apos+alen; 
k++ )
                                                {
                                                        double aval = avals[k];
                                                        int aix = aixs[k];
@@ -1553,15 +1541,17 @@ public class LibMatrixMult
                }
                else //right is ultra-sparse (KJI)
                {
+                       SparseBlock b = m2.sparseBlock;
+                       
                        for(int k = 0; k < cd; k++ ) 
                        {                       
-                               SparseRow brow = m2.sparseBlock[ k ];
-                               if( brow != null && !brow.isEmpty() ) 
+                               if( !b.isEmpty(k) ) 
                                {
-                                       int blen = brow.size();
-                                       int[] bixs = brow.getIndexContainer();
-                                       double[] bvals = 
brow.getValueContainer();                                                       
       
-                                       for( int j=0; j<blen; j++ )
+                                       int bpos = b.pos(k);
+                                       int blen = b.size(k);
+                                       int[] bixs = b.indexes(k);
+                                       double[] bvals = b.values(k);           
                                                
+                                       for( int j=bpos; j<bpos+blen; j++ )
                                        {
                                                double bval = bvals[j];
                                                int bix = bixs[j];
@@ -1647,7 +1637,7 @@ public class LibMatrixMult
         */
        private static void matrixMultChainSparse(MatrixBlock mX, MatrixBlock 
mV, MatrixBlock mW, MatrixBlock ret, ChainType ct, int rl, int ru) 
        {
-               SparseRow[] a = mX.sparseBlock;
+               SparseBlock a = mX.sparseBlock;
                double[] b = mV.denseBlock;
                double[] w = (mW!=null) ? mW.denseBlock : null;
                double[] c = ret.denseBlock;
@@ -1667,12 +1657,10 @@ public class LibMatrixMult
 
                        //compute 1st matrix-vector for row block
                        for( int j=0; j < tmplen; j++) {
-                               SparseRow arow = a[bi+j];
-                               if( arow != null && !arow.isEmpty() ) {
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();                                      
-                                       tmp[j] = dotProduct(avals, b, aix, 0, 
alen);                                                    
+                               if( !a.isEmpty(bi+j) ) {
+                                       int apos = a.pos(bi+j);
+                                       int alen = a.size(bi+j);                
                
+                                       tmp[j] = dotProduct(a.values(bi+j), b, 
a.indexes(bi+j), apos, 0, alen);                                                
 
                                }
                        }
                        
@@ -1684,12 +1672,12 @@ public class LibMatrixMult
                
                        //compute 2nd matrix vector for row block and aggregate
                        for( int j=0; j < tmplen; j++) {
-                               SparseRow arow = a[bi+j];
-                               if( arow != null && !arow.isEmpty() && tmp[j] 
!= 0 ) {
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();              
-                                       vectMultiplyAdd(tmp[j], avals, c, aix, 
0, alen);                                                        
+                               if( !a.isEmpty(bi+j) && tmp[j] != 0 ) {
+                                       int apos = a.pos(bi+j);
+                                       int alen = a.size(bi+j);
+                                       int[] aix = a.indexes(bi+j);
+                                       double[] avals = a.values(bi+j);        
        
+                                       vectMultiplyAdd(tmp[j], avals, c, aix, 
apos, 0, alen);                                                  
                                }
                        }
                }
@@ -1848,6 +1836,7 @@ public class LibMatrixMult
        {
                //2) transpose self matrix multiply sparse
                // (compute only upper-triangular matrix due to symmetry)       
        
+               SparseBlock a = m1.sparseBlock;
                double[] c = ret.denseBlock;
                int m = m1.rlen;
                int n = m1.clen;
@@ -1858,16 +1847,17 @@ public class LibMatrixMult
                        //algorithm: scan rows, foreach row self join (KIJ)
                        if( LOW_LEVEL_OPTIMIZATION )
                        {
-                               for( SparseRow arow : m1.sparseBlock )
-                                       if( arow != null && !arow.isEmpty() ) 
+                               for( int r=0; r<a.numRows(); r++ )
+                                       if( !a.isEmpty(r) ) 
                                        {
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();                                      
-                                               int rlix = (rl==0) ? 0 : 
arow.searchIndexesFirstGTE(rl);
-                                               rlix = (rlix>=0) ? rlix : alen;
+                                               int apos = a.pos(r);
+                                               int alen = a.size(r);
+                                               int[] aix = a.indexes(r);
+                                               double[] avals = a.values(r);   
                                
+                                               int rlix = (rl==0) ? apos : 
a.posFIndexGTE(r, rl);
+                                               rlix = (rlix>=0) ? rlix : 
apos+alen;
                                                
-                                               for(int i = rlix; i < alen && 
aix[i]<ru; i++) 
+                                               for(int i = rlix; i < apos+alen 
&& aix[i]<ru; i++) 
                                                {
                                                        double val = avals[i];
                                                        if( val != 0 ) {
@@ -1879,20 +1869,21 @@ public class LibMatrixMult
                        }
                        else
                        {
-                               for( SparseRow arow : m1.sparseBlock )
-                                       if( arow != null && !arow.isEmpty() ) 
+                               for( int r=0; r<a.numRows(); r++ )
+                                       if( !a.isEmpty(r) ) 
                                        {
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();                                      
-                                               int rlix = (rl==0) ? 0 : 
arow.searchIndexesFirstGTE(rl);
-                                               rlix = (rlix>=0) ? rlix : alen;
+                                               int apos = a.pos(r);
+                                               int alen = a.size(r);
+                                               int[] aix = a.indexes(r);
+                                               double[] avals = a.values(r);   
                                
+                                               int rlix = (rl==0) ? apos : 
a.posFIndexGTE(r, rl);
+                                               rlix = (rlix>=0) ? rlix : 
apos+alen;
                                                
-                                               for(int i = rlix; i < alen && 
aix[i]<ru; i++) 
+                                               for(int i = rlix; i < apos+alen 
&& aix[i]<ru; i++) 
                                                {
                                                        double val = avals[i];
                                                        if( val != 0 )
-                                                               for(int j = i, 
ix2 = aix[i]*n; j < alen; j++)
+                                                               for(int j = i, 
ix2 = aix[i]*n; j < apos+alen; j++)
                                                                        
c[ix2+aix[j]] += val * avals[j];
                                                }
                                        }
@@ -1902,11 +1893,9 @@ public class LibMatrixMult
                {
                        if( m==1 ) //VECTOR 
                        {
-                               SparseRow arow = m1.sparseBlock[0];
-                               if( arow !=null && !arow.isEmpty() )
-                               {
-                                       int alen = arow.size();
-                                       double[] avals = 
arow.getValueContainer();      
+                               if( !m1.sparseBlock.isEmpty(0) ) {
+                                       int alen = m1.sparseBlock.size(0); 
//pos always 0
+                                       double[] avals = a.values(0);   
                                        c[0] = dotProduct(avals, avals, alen);
                                }
                        }
@@ -1921,16 +1910,17 @@ public class LibMatrixMult
                                //algorithm: scan rows, foreach row self join 
(KIJ)
                                if( LOW_LEVEL_OPTIMIZATION )
                                {
-                                       for( SparseRow arow : m1.sparseBlock )
-                                               if( arow != null && 
!arow.isEmpty() ) 
+                                       for( int r=0; r<a.numRows(); r++ )
+                                               if( !a.isEmpty(r) ) 
                                                {
-                                                       int alen = arow.size();
-                                                       int[] aix = 
arow.getIndexContainer();
-                                                       double[] avals = 
arow.getValueContainer();                                      
-                                                       int rlix = (rl==0) ? 0 
: arow.searchIndexesFirstGTE(rl);
-                                                       rlix = (rlix>=0) ? rlix 
: alen;
+                                                       int apos = a.pos(r);
+                                                       int alen = a.size(r);
+                                                       int[] aix = 
a.indexes(r);
+                                                       double[] avals = 
a.values(r);                                   
+                                                       int rlix = (rl==0) ? 
apos : a.posFIndexGTE(r, rl);
+                                                       rlix = (rlix>=0) ? rlix 
: apos+alen;
                                                        
-                                                       for(int i = rlix; i < 
alen && aix[i]<ru; i++) 
+                                                       for(int i = rlix; i < 
apos+alen && aix[i]<ru; i++) 
                                                        {
                                                                double val = 
avals[i];
                                                                if( val != 0 ) {
@@ -1942,16 +1932,17 @@ public class LibMatrixMult
                                }
                                else
                                {
-                                       for( SparseRow arow : m1.sparseBlock )
-                                               if( arow != null && 
!arow.isEmpty() ) 
+                                       for( int r=0; r<a.numRows(); r++ )
+                                               if( !a.isEmpty(r) ) 
                                                {
-                                                       int alen = arow.size();
-                                                       int[] aix = 
arow.getIndexContainer();
-                                                       double[] avals = 
arow.getValueContainer();                                      
-                                                       int rlix = (rl==0) ? 0 
: arow.searchIndexesFirstGTE(rl);
-                                                       rlix = (rlix>=0) ? rlix 
: alen;
+                                                       int apos = a.pos(r);
+                                                       int alen = a.size(r);
+                                                       int[] aix = 
a.indexes(r);
+                                                       double[] avals = 
a.values(r);                                   
+                                                       int rlix = (rl==0) ? 
apos : a.posFIndexGTE(r,rl);
+                                                       rlix = (rlix>=0) ? rlix 
: apos+alen;
                                                        
-                                                       for(int i = rlix; i < 
alen && aix[i]<ru; i++) 
+                                                       for(int i = rlix; i < 
apos+alen && aix[i]<ru; i++) 
                                                        {
                                                                double val = 
avals[i];
                                                                if( val != 0 )
@@ -2023,7 +2014,7 @@ public class LibMatrixMult
        {
                double[] a = pm1.denseBlock;
                double[] b = m2.denseBlock;
-               SparseRow[] c = ret1.sparseBlock;
+               SparseBlock c = ret1.sparseBlock;
 
                final int n = m2.clen;
                final int brlen = ret1.getNumRows();
@@ -2048,9 +2039,8 @@ public class LibMatrixMult
                                }
                
                                //append entire dense row into sparse target 
position
-                               c[bpos] = new SparseRow( n );
                                for( int j=0; j<n; j++ )
-                                       c[bpos].append(j, b[bix+j]);
+                                       c.append(bpos, j, b[bix+j]);
                                lastblk = blk;
                        }
                }
@@ -2069,8 +2059,8 @@ public class LibMatrixMult
        private static void matrixMultPermuteSparse( MatrixBlock pm1, 
MatrixBlock m2, MatrixBlock ret1, MatrixBlock ret2, int rl, int ru)
        {
                double[] a = pm1.denseBlock;
-               SparseRow[] b = m2.sparseBlock;
-               SparseRow[] c = ret1.sparseBlock;
+               SparseBlock b = m2.sparseBlock;
+               SparseBlock c = ret1.sparseBlock;
 
                final int brlen = ret1.getNumRows();
                
@@ -2093,8 +2083,7 @@ public class LibMatrixMult
                                }
                
                                //memcopy entire sparse row into target position
-                               if( b[i] != null )
-                                       c[bpos] = new SparseRow( b[i] );
+                               c.set(bpos, new SparseRow( b.get(i) ));
                                lastblk = blk;
                        }
                }
@@ -2198,8 +2187,8 @@ public class LibMatrixMult
         */
        private static void matrixMultWSLossSparseDense(MatrixBlock mX, 
MatrixBlock mU, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, WeightsType 
wt, int rl, int ru)
        {
-               SparseRow[] x = mX.sparseBlock;
-               SparseRow[] w = (mW!=null)? mW.sparseBlock : null;
+               SparseBlock x = mX.sparseBlock;
+               SparseBlock w = (mW!=null)? mW.sparseBlock : null;
                double[] u = mU.denseBlock;
                double[] v = mV.denseBlock;
                final int n = mX.clen; 
@@ -2211,11 +2200,12 @@ public class LibMatrixMult
                {
                        // approach: iterate over W, point-wise in order to 
exploit sparsity
                        for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd )
-                               if( w[i] != null && !w[i].isEmpty() ) {
-                                       int wlen = w[i].size();
-                                       int[] wix = w[i].getIndexContainer();
-                                       double[] wval = 
w[i].getValueContainer();
-                                       for( int k=0; k<wlen; k++ ) {
+                               if( !w.isEmpty(i) ) {
+                                       int wpos = w.pos(i);
+                                       int wlen = w.size(i);
+                                       int[] wix = w.indexes(i);
+                                       double[] wval = w.values(i);
+                                       for( int k=wpos; k<wpos+wlen; k++ ) {
                                                double xi = mX.quickGetValue(i, 
wix[k]);
                                                double uvij = dotProduct(u, v, 
uix, wix[k]*cd, cd);
                                                wsloss += 
wval[k]*(xi-uvij)*(xi-uvij);
@@ -2227,11 +2217,12 @@ public class LibMatrixMult
                {
                        // approach: iterate over W, point-wise in order to 
exploit sparsity
                        for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd )
-                               if( x[i] != null && !x[i].isEmpty() ) {
-                                       int xlen = x[i].size();
-                                       int[] xix = x[i].getIndexContainer();
-                                       double[] xval = 
x[i].getValueContainer();
-                                       for( int k=0; k<xlen; k++ ) {
+                               if( !x.isEmpty(i) ) {
+                                       int xpos = x.pos(i);
+                                       int xlen = x.size(i);
+                                       int[] xix = x.indexes(i);
+                                       double[] xval = x.values(i);
+                                       for( int k=xpos; k<xpos+xlen; k++ ) {
                                                double uvij = dotProduct(u, v, 
uix, xix[k]*cd, cd);
                                                wsloss += 
(xval[k]-uvij)*(xval[k]-uvij);
                                        }
@@ -2259,18 +2250,19 @@ public class LibMatrixMult
                        // approach: iterate over all cells of X and 
                        for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) 
                        {
-                               if( x[i]==null || x[i].isEmpty() ) { //empty row
+                               if( x.isEmpty(i) ) { //empty row
                                        for( int j=0, vix=0; j<n; j++, vix+=cd) 
{
                                                double uvij = dotProduct(u, v, 
uix, vix, cd);
                                                wsloss += (-uvij)*(-uvij);
                                        }
                                }
                                else { //non-empty row
-                                       int xlen = x[i].size();
-                                       int[] xix = x[i].getIndexContainer();
-                                       double[] xval = 
x[i].getValueContainer();
+                                       int xpos = x.pos(i);
+                                       int xlen = x.size(i);
+                                       int[] xix = x.indexes(i);
+                                       double[] xval = x.values(i);
                                        int last = -1;
-                                       for( int k=0; k<xlen; k++ ) {
+                                       for( int k=xpos; k<xpos+xlen; k++ ) {
                                                //process last nnz til current 
nnz
                                                for( int k2=last+1; k2<xix[k]; 
k2++ ){
                                                        double uvij = 
dotProduct(u, v, uix, k2*cd, cd);
@@ -2316,14 +2308,15 @@ public class LibMatrixMult
                        // approach: iterate over W, point-wise in order to 
exploit sparsity
                        if( mW.sparse ) //SPARSE
                        {
-                               SparseRow[] wrows = mW.sparseBlock;
+                               SparseBlock w = mW.sparseBlock;
                                
                                for( int i=rl; i<ru; i++ )
-                                       if( wrows[i] != null && 
!wrows[i].isEmpty() ){
-                                               int wlen = wrows[i].size();
-                                               int[] wix = 
wrows[i].getIndexContainer();
-                                               double[] wval = 
wrows[i].getValueContainer();
-                                               for( int k=0; k<wlen; k++ ) {
+                                       if( !w.isEmpty(i) ) {
+                                               int wpos = w.pos(i);
+                                               int wlen = w.size(i);
+                                               int[] wix = w.indexes(i);
+                                               double[] wval = w.values(i);
+                                               for( int k=wpos; k<wpos+wlen; 
k++ ) {
                                                        double uvij = 
dotProductGeneric(mU, mV, i, wix[k], cd);
                                                        double xi = 
mX.quickGetValue(i, wix[k]);
                                                        wsloss += 
wval[k]*(xi-uvij)*(xi-uvij);
@@ -2349,14 +2342,15 @@ public class LibMatrixMult
                        // approach: iterate over W, point-wise in order to 
exploit sparsity
                        if( mW.sparse ) //SPARSE
                        {
-                               SparseRow[] xrows = mX.sparseBlock;
+                               SparseBlock x = mX.sparseBlock;
                                
                                for( int i=rl; i<ru; i++ )
-                                       if( xrows[i] != null && 
!xrows[i].isEmpty() ){
-                                               int xlen = xrows[i].size();
-                                               int[] xix = 
xrows[i].getIndexContainer();
-                                               double[] xval = 
xrows[i].getValueContainer();
-                                               for( int k=0; k<xlen; k++ ) {
+                                       if( !x.isEmpty(i) ) {
+                                               int xpos = x.pos(i);
+                                               int xlen = x.size(i);
+                                               int[] xix = x.indexes(i);
+                                               double[] xval = x.values(i);
+                                               for( int k=xpos; k<xpos+xlen; 
k++ ) {
                                                        double uvij = 
dotProductGeneric(mU, mV, i, xix[k], cd);
                                                        wsloss += 
(xval[k]-uvij)*(xval[k]-uvij);
                                                }
@@ -2469,11 +2463,10 @@ public class LibMatrixMult
        private static void matrixMultWSigmoidSparseDense(MatrixBlock mW, 
MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WSigmoidType wt, int rl, int 
ru) 
                throws DMLRuntimeException
        {
-               SparseRow[] w = mW.sparseBlock;
-               SparseRow[] c = ret.sparseBlock;
+               SparseBlock w = mW.sparseBlock;
+               SparseBlock c = ret.sparseBlock;
                double[] u = mU.denseBlock;
                double[] v = mV.denseBlock;
-               final int n = mW.clen; 
                final int cd = mU.clen;
                
                boolean flagminus = (wt==WSigmoidType.MINUS || 
wt==WSigmoidType.LOG_MINUS); 
@@ -2481,15 +2474,16 @@ public class LibMatrixMult
        
                //approach: iterate over non-zeros of w, selective mm 
computation
                for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd )
-                       if( w[i] != null && !w[i].isEmpty() ) {
-                               int wlen = w[i].size();
-                               int[] wix = w[i].getIndexContainer();
-                               double[] wval = w[i].getValueContainer();
-                               c[i] = new SparseRow(wlen, n);
+                       if( !w.isEmpty(i) ) {
+                               int wpos = w.pos(i);
+                               int wlen = w.size(i);
+                               int[] wix = w.indexes(i);
+                               double[] wval = w.values(i);
                                
-                               for( int k=0; k<wlen; k++ ) {
+                               c.allocate(i, wlen);
+                               for( int k=wpos; k<wpos+wlen; k++ ) {
                                        double cval = wsigmoid(wval[k], u, v, 
uix, wix[k]*cd, flagminus, flaglog, cd);
-                                       c[i].append(wix[k], cval);
+                                       c.append(i, wix[k], cval);
                                }
                        }
        }
@@ -2519,19 +2513,20 @@ public class LibMatrixMult
                if( mW.sparse ) //SPARSE
                {
                        //w and c always in same representation
-                       SparseRow[] w = mW.sparseBlock;
-                       SparseRow[] c = ret.sparseBlock;
+                       SparseBlock w = mW.sparseBlock;
+                       SparseBlock c = ret.sparseBlock;
                        
                        for( int i=rl; i<ru; i++ )
-                               if( w[i] != null && !w[i].isEmpty() ) {
-                                       int wlen = w[i].size();
-                                       int[] wix = w[i].getIndexContainer();
-                                       double[] wval = 
w[i].getValueContainer();
-                                       c[i] = new SparseRow(wlen, n);
+                               if( !w.isEmpty(i) ) {
+                                       int wpos = w.pos(i);
+                                       int wlen = w.size(i);
+                                       int[] wix = w.indexes(i);
+                                       double[] wval = w.values(i);
                                        
-                                       for( int k=0; k<wlen; k++ ) {
+                                       c.allocate(i, wlen);
+                                       for( int k=wpos; k<wpos+wlen; k++ ) {
                                                double cval = wsigmoid(wval[k], 
mU, mV, i, wix[k], flagminus, flaglog, cd);
-                                               c[i].append(wix[k], cval);
+                                               c.append(i, wix[k], cval);
                                        }
                                }       
                }
@@ -2622,26 +2617,27 @@ public class LibMatrixMult
                final boolean minus = wt.isMinus();
                final int cd = mU.clen;
                
-               SparseRow[] w = mW.sparseBlock;
+               SparseBlock w = mW.sparseBlock;
                double[] u = mU.denseBlock;
                double[] v = mV.denseBlock;
                double[] c = ret.denseBlock;
                
                //approach: iterate over non-zeros of w, selective mm 
computation
                for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) {
-                       if( w[i] != null && !w[i].isEmpty() ) {
-                               int wlen = w[i].size();
-                               int[] wix = w[i].getIndexContainer();
-                               double[] wval = w[i].getValueContainer();
+                       if( !w.isEmpty(i) ) {
+                               int wpos = w.pos(i);
+                               int wlen = w.size(i);
+                               int[] wix = w.indexes(i);
+                               double[] wval = w.values(i);
                        
                                if( basic ) {
-                                       for( int k=0; k<wlen; k++ )
+                                       for( int k=wpos; k<wpos+wlen; k++ )
                                                ret.appendValue( i, wix[k], 
wval[k] * dotProduct(u, v, uix, wix[k]*cd, cd));
                                }
                                else { //left/right minus default
-                                       int k = (cl==0) ? 0 : 
w[i].searchIndexesFirstGTE(cl);
-                                       k = (k>=0) ? k : wlen;
-                                       for( ; k<wlen && wix[k]<cu; k++ )
+                                       int k = (cl==0) ? wpos : 
w.posFIndexGTE(i,cl);
+                                       k = (k>=0) ? k : wpos+wlen;
+                                       for( ; k<wpos+wlen && wix[k]<cu; k++ )
                                                wdivmm(wval[k], u, v, c, uix, 
wix[k]*cd, left, mult, minus, cd);
                                }
                        }
@@ -2676,16 +2672,17 @@ public class LibMatrixMult
                //approach: iterate over non-zeros of w, selective mm 
computation
                if( mW.sparse ) //SPARSE
                {
-                       SparseRow[] w = mW.sparseBlock;
+                       SparseBlock w = mW.sparseBlock;
                        
                        for( int i=rl; i<ru; i++ ) {
-                               if( w[i] != null && !w[i].isEmpty() ) {
-                                       int wlen = w[i].size();
-                                       int[] wix = w[i].getIndexContainer();
-                                       double[] wval = 
w[i].getValueContainer();
-                                       int k = (cl==0) ? 0 : 
w[i].searchIndexesFirstGTE(cl);
-                                       k = (k>=0) ? k : wlen;
-                                       for( ; k<wlen && wix[k]<cu; k++ ) { 
+                               if( !w.isEmpty(i) ) {
+                                       int wpos = w.pos(i);
+                                       int wlen = w.size(i);
+                                       int[] wix = w.indexes(i);
+                                       double[] wval = w.values(i);
+                                       int k = (cl==0) ? wpos : 
w.posFIndexGTE(i,cl);
+                                       k = (k>=0) ? k : wpos+wlen;
+                                       for( ; k<wpos+wlen && wix[k]<cu; k++ ) 
{ 
                                                if( basic ) {
                                                        double uvij = 
dotProductGeneric(mU,mV, i, wix[k], cd);
                                                        ret.appendValue(i, 
wix[k], uvij);
@@ -2769,7 +2766,7 @@ public class LibMatrixMult
         */
        private static void matrixMultWCeMMSparseDense(MatrixBlock mW, 
MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WCeMMType wt, int rl, int ru)
        {
-               SparseRow[] w = mW.sparseBlock;
+               SparseBlock w = mW.sparseBlock;
                double[] u = mU.denseBlock;
                double[] v = mV.denseBlock;
                final int cd = mU.clen;
@@ -2777,11 +2774,12 @@ public class LibMatrixMult
                
                // approach: iterate over all cells of X and 
                for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) {
-                       if( w[i]!=null && !w[i].isEmpty() ) { 
-                               int wlen = w[i].size();
-                               int[] wix = w[i].getIndexContainer();
-                               double[] wval = w[i].getValueContainer();
-                               for( int k=0; k<wlen; k++ ) {
+                       if( !w.isEmpty(i) ) { 
+                               int wpos = w.pos(i);
+                               int wlen = w.size(i);
+                               int[] wix = w.indexes(i);
+                               double[] wval = w.values(i);
+                               for( int k=wpos; k<wpos+wlen; k++ ) {
                                        double uvij = dotProduct(u, v, uix, 
wix[k]*cd, cd);
                                        wceval += wval[k] * FastMath.log(uvij); 
                                
                                }
@@ -2811,14 +2809,15 @@ public class LibMatrixMult
                //approach: iterate over non-zeros of w, selective mm 
computation
                if( mW.sparse ) //SPARSE
                {
-                       SparseRow[] w = mW.sparseBlock;
+                       SparseBlock w = mW.sparseBlock;
                        
                        for( int i=rl; i<ru; i++ )
-                               if( w[i] != null && !w[i].isEmpty() ) {
-                                       int wlen = w[i].size();
-                                       int[] wix = w[i].getIndexContainer();
-                                       double[] wval = 
w[i].getValueContainer();
-                                       for( int k=0; k<wlen; k++ ) {
+                               if( !w.isEmpty(i) ) {
+                                       int wpos = w.pos(i);
+                                       int wlen = w.size(i);
+                                       int[] wix = w.indexes(i);
+                                       double[] wval = w.values(i);
+                                       for( int k=wpos; k<wpos+wlen; k++ ) {
                                                double uvij = 
dotProductGeneric(mU, mV, i, wix[k], cd);
                                                wceval += wval[k] * 
FastMath.log(uvij); 
                                        }
@@ -2905,26 +2904,26 @@ public class LibMatrixMult
        private static void matrixMultWuMMSparseDense(MatrixBlock mW, 
MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WUMMType wt, ValueFunction fn, 
int rl, int ru) 
                throws DMLRuntimeException
        {
-               SparseRow[] w = mW.sparseBlock;
-               SparseRow[] c = ret.sparseBlock;
+               SparseBlock w = mW.sparseBlock;
+               SparseBlock c = ret.sparseBlock;
                double[] u = mU.denseBlock;
                double[] v = mV.denseBlock;
-               final int n = mW.clen; 
                final int cd = mU.clen;
                
                boolean flagmult = (wt==WUMMType.MULT); 
        
                //approach: iterate over non-zeros of w, selective mm 
computation
                for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd )
-                       if( w[i] != null && !w[i].isEmpty() ) {
-                               int wlen = w[i].size();
-                               int[] wix = w[i].getIndexContainer();
-                               double[] wval = w[i].getValueContainer();
-                               c[i] = new SparseRow(wlen, n);
+                       if( !w.isEmpty(i) ) {
+                               int wpos = w.pos(i);
+                               int wlen = w.size(i);
+                               int[] wix = w.indexes(i);
+                               double[] wval = w.values(i);
                                
-                               for( int k=0; k<wlen; k++ ) {
+                               c.allocate(i, wlen);
+                               for( int k=wpos; k<wpos+wlen; k++ ) {
                                        double cval = wumm(wval[k], u, v, uix, 
wix[k]*cd, flagmult, fn, cd);
-                                       c[i].append(wix[k], cval);
+                                       c.append(i, wix[k], cval);
                                }
                        }
        }
@@ -2953,19 +2952,20 @@ public class LibMatrixMult
                if( mW.sparse ) //SPARSE
                {
                        //w and c always in same representation
-                       SparseRow[] w = mW.sparseBlock;
-                       SparseRow[] c = ret.sparseBlock;
+                       SparseBlock w = mW.sparseBlock;
+                       SparseBlock c = ret.sparseBlock;
                        
                        for( int i=rl; i<ru; i++ )
-                               if( w[i] != null && !w[i].isEmpty() ) {
-                                       int wlen = w[i].size();
-                                       int[] wix = w[i].getIndexContainer();
-                                       double[] wval = 
w[i].getValueContainer();
-                                       c[i] = new SparseRow(wlen, n);
+                               if( !w.isEmpty(i) ) {
+                                       int wpos = w.pos(i);
+                                       int wlen = w.size(i);
+                                       int[] wix = w.indexes(i);
+                                       double[] wval = w.values(i);
                                        
-                                       for( int k=0; k<wlen; k++ ) {
+                                       c.allocate(i, wlen); 
+                                       for( int k=wpos; k<wpos+wlen; k++ ) {
                                                double cval = wumm(wval[k], mU, 
mV, i, wix[k], flagmult, fn, cd);
-                                               c[i].append(wix[k], cval);
+                                               c.append(i, wix[k], cval);
                                        }
                                }       
                }
@@ -3064,17 +3064,17 @@ public class LibMatrixMult
                return val; 
        }
        
-       private static double dotProduct( double[] a, double[] b, int[] aix, 
final int bi, final int len )
+       private static double dotProduct( double[] a, double[] b, int[] aix, 
int ai, final int bi, final int len )
        {
                double val = 0;
                final int bn = len%8;
                                
                //compute rest
-               for( int i = 0; i < bn; i++ )
+               for( int i = ai; i < ai+bn; i++ )
                        val += a[ i ] * b[ bi+aix[i] ];
                
                //unrolled 8-block (for better instruction-level parallelism)
-               for( int i = bn; i < len; i+=8 )
+               for( int i = ai+bn; i < ai+len; i+=8 )
                {
                        //read 64B cacheline of a
                        //read 64B of b via 'gather'
@@ -3250,6 +3250,7 @@ public class LibMatrixMult
         * @param ci
         * @param len
         */
+       @SuppressWarnings("unused")
        private static void vectMultiplyAdd( final double aval, double[] b, 
double[] c, int[] bix, final int ci, final int len )
        {
                final int bn = len%8;


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8ba0fdcc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java
index c46e2c5..38df263 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java
@@ -901,17 +901,17 @@ public class LibMatrixOuterAgg
                if( in.isEmptyBlock(false) )
                        return;
                        
-               SparseRow[] aSparseRows = in.getSparseBlock();          
-               for (int j = 0; j < aSparseRows.length; ++j)
-               if( aSparseRows[j]!=null && !aSparseRows[j].isEmpty() )
-               {
-                       double [] aValues = aSparseRows[j].getValueContainer();
-                       int [] aIndexes = aSparseRows[j].getIndexContainer();
+               SparseBlock sblock = in.getSparseBlock();               
+               for( int j = 0; j < sblock.numRows(); j++)
+               if( !sblock.isEmpty(j) ) {
+                       int apos = sblock.pos(j);
+                       int alen = sblock.size(j);
+                       int[] aix = sblock.indexes(j);
+                       double [] avals = sblock.values(j);
                        
-                       for (int i=0; i < aValues.length; ++i)
-                       {
-                               int cnt = sumRowSumGtLeColSumLtGe(aValues[i], 
bv, bOp);
-                               out.quickSetValue(0, aIndexes[i], cnt);
+                       for (int i=apos; i < apos+alen; i++) {
+                               int cnt = sumRowSumGtLeColSumLtGe(avals[i], bv, 
bOp);
+                               out.quickSetValue(0, aix[i], cnt);
                        }
                }               
        }
@@ -961,17 +961,17 @@ public class LibMatrixOuterAgg
                if( in.isEmptyBlock(false) )
                        return;
                        
-               SparseRow[] aSparseRows = in.getSparseBlock();          
-               for (int j = 0; j < aSparseRows.length; ++j)
-               if( aSparseRows[j]!=null && !aSparseRows[j].isEmpty() )
-               {
-                       double [] aValues = aSparseRows[j].getValueContainer();
-                       int [] aIndexes = aSparseRows[j].getIndexContainer();
+               SparseBlock sblock = in.getSparseBlock();               
+               for (int j = 0; j < sblock.numRows(); j++)
+               if( !sblock.isEmpty(j) ) {
+                       int apos = sblock.pos(j);
+                       int alen = sblock.size(j);
+                       int[] aix = sblock.indexes(j);
+                       double [] avals = sblock.values(j);
                        
-                       for (int i=0; i < aValues.length; ++i)
-                       {
-                               int cnt = sumRowSumLtGeColSumGtLe(aValues[i], 
bv, bOp);
-                               out.quickSetValue(0, aIndexes[i], cnt);
+                       for (int i=apos; i < apos+alen; i++) {
+                               int cnt = sumRowSumLtGeColSumGtLe(avals[i], bv, 
bOp);
+                               out.quickSetValue(0, aix[i], cnt);
                        }
                }               
        }
@@ -1021,17 +1021,17 @@ public class LibMatrixOuterAgg
                if( in.isEmptyBlock(false) )
                        return;
                        
-               SparseRow[] aSparseRows = in.getSparseBlock();          
-               for (int j = 0; j < aSparseRows.length; ++j)
-               if( aSparseRows[j]!=null && !aSparseRows[j].isEmpty() )
-               {
-                       double [] aValues = aSparseRows[j].getValueContainer();
-                       int [] aIndexes = aSparseRows[j].getIndexContainer();
+               SparseBlock sblock = in.getSparseBlock();               
+               for (int j = 0; j < sblock.numRows(); j++)
+               if( !sblock.isEmpty(j) ) {
+                       int apos = sblock.pos(j);
+                       int alen = sblock.size(j);
+                       int[] aix = sblock.indexes(j);
+                       double [] avals = sblock.values(j);
                        
-                       for (int i=0; i < aValues.length; ++i)
-                       {
-                               int cnt = sumEqNe(aValues[i], bv, bOp);
-                               out.quickSetValue(0, aIndexes[i], cnt);
+                       for (int i=apos; i < apos+alen; ++i) {
+                               int cnt = sumEqNe(avals[i], bv, bOp);
+                               out.quickSetValue(0, aix[i], cnt);
                        }
                }               
        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8ba0fdcc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index 4ea4d1a..bdfe7c8 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -365,8 +365,8 @@ public class LibMatrixReorg
                                out.allocateSparseRowsBlock(false);
                                for( int i=0; i<rlen; i++ ) {
                                        int ix = vix[i];
-                                       if( in.sparseBlock[ix]!=null && 
!in.sparseBlock[ix].isEmpty() ) {
-                                               out.sparseBlock[i] = new 
SparseRow(in.sparseBlock[ix]);
+                                       if( !in.sparseBlock.isEmpty(ix) ) {
+                                               out.sparseBlock.set(i, 
in.sparseBlock.get(ix));
                                        }
                                }
                        }
@@ -799,7 +799,7 @@ public class LibMatrixReorg
                out.allocateSparseRowsBlock();
                                
                double[] a = in.getDenseBlock();
-               SparseRow[] c = out.getSparseBlock();
+               SparseBlock c = out.getSparseBlock();
                
                //blocking according to typical L2 cache sizes 
                final int blocksizeI = 128;
@@ -815,9 +815,8 @@ public class LibMatrixReorg
                                for( int i=bi; i<bimin; i++ )                   
        
                                        for( int j=bj, aix=i*n+bj; j<bjmin; 
j++, aix++ )
                                        {
-                                               if( c[j] == null )
-                                                        c[j] = new 
SparseRow(ennz2,n2);
-                                               c[j].append(i, a[aix]);
+                                               c.allocate(j, ennz2, n2); 
+                                               c.append(j, i, a[aix]);
                                        }
                        }
                
@@ -841,8 +840,8 @@ public class LibMatrixReorg
                out.reset(m2, n2, true); //always sparse
                out.allocateSparseRowsBlock();
                
-               SparseRow[] a = in.getSparseBlock();
-               SparseRow[] c = out.getSparseBlock();
+               SparseBlock a = in.getSparseBlock();
+               SparseBlock c = out.getSparseBlock();
 
                //initial pass to determine capacity (this helps to prevent
                //sparse row reallocations and mem inefficiency w/ skew
@@ -850,17 +849,18 @@ public class LibMatrixReorg
                if( n <= 4096 ) { //16KB
                        cnt = new int[n];
                        for( int i=0; i<m; i++ ) {
-                               if( a[i] !=null && !a[i].isEmpty() )
-                                       countAgg(cnt, a[i].getIndexContainer(), 
a[i].size());
+                               if( !a.isEmpty(i) )
+                                       countAgg(cnt, a.indexes(i), a.pos(i), 
a.size(i));
                        }
                }
                
                //allocate output sparse rows
-               if( cnt != null ) {
-                       for( int i=0; i<m2; i++ )
-                               if( cnt[i] > 0 )
-                                       c[i] = new SparseRow(cnt[i]);
-               }
+               //TODO perf sparse block
+               //if( cnt != null ) {
+               //      for( int i=0; i<m2; i++ )
+               //              if( cnt[i] > 0 )
+               //                      c[i] = new SparseRow(cnt[i]);
+               //}
                
                //blocking according to typical L2 cache sizes 
                final int blocksizeI = 128;
@@ -881,18 +881,15 @@ public class LibMatrixReorg
                                //core transpose operation
                                for( int i=bi, iix=0; i<bimin; i++, iix++ )
                                {
-                                       SparseRow arow = a[i];
-                                       if( arow!=null && !arow.isEmpty() )
-                                       {
-                                               int alen = arow.size();
-                                               double[] avals = 
arow.getValueContainer();
-                                               int[] aix = 
arow.getIndexContainer();
+                                       if( !a.isEmpty(i) ) {
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);
                                                int j = ix[iix]; //last block 
boundary
-                                               for( ; j<alen && aix[j]<bjmin; 
j++ )
-                                               {
-                                                       if( c[aix[j]] == null )
-                                                                c[aix[j]] = 
new SparseRow(ennz2,n2);
-                                                       c[aix[j]].append(i, 
avals[j]);
+                                               for( ; j<alen && aix[j]<bjmin; 
j++ ) {
+                                                       c.allocate(aix[apos+j], 
ennz2,n2);
+                                                       c.append(aix[apos+j], 
i, avals[apos+j]);
                                                }
                                                ix[iix] = j; //keep block 
boundary
                                        }
@@ -920,15 +917,14 @@ public class LibMatrixReorg
                out.reset(m2, n2, false); //always dense
                out.allocateDenseBlock();
                
-               SparseRow[] a = in.getSparseBlock();
+               SparseBlock a = in.getSparseBlock();
                double[] c = out.getDenseBlock();
                
                if( m==1 ) //ROW VECTOR TRANSPOSE
                {
-                       SparseRow arow = a[0];
-                       int alen = arow.size();
-                       int[] aix = arow.getIndexContainer();
-                       double[] avals = arow.getValueContainer();
+                       int alen = a.size(0); //always pos 0
+                       int[] aix = a.indexes(0);
+                       double[] avals = a.values(0);
                        for( int j=0; j<alen; j++ )
                                c[ aix[j] ] = avals[j];
                }
@@ -953,15 +949,14 @@ public class LibMatrixReorg
                                        //core transpose operation
                                        for( int i=bi, iix=0; i<bimin; i++, 
iix++ )
                                        {
-                                               SparseRow arow = a[i];
-                                               if( arow!=null && 
!arow.isEmpty() )
-                                               {
-                                                       int alen = arow.size();
-                                                       double[] avals = 
arow.getValueContainer();
-                                                       int[] aix = 
arow.getIndexContainer();
+                                               if( !a.isEmpty(i) ) {
+                                                       int apos = a.pos(i);
+                                                       int alen = a.size(i);
+                                                       int[] aix = 
a.indexes(i);
+                                                       double[] avals = 
a.values(i);
                                                        int j = ix[iix]; //last 
block boundary
-                                                       for( ; j<alen && 
aix[j]<bjmin; j++ )
-                                                               c[ aix[j]*n2+i 
] = avals[ j ];
+                                                       for( ; j<alen && 
aix[apos+j]<bjmin; j++ )
+                                                               c[ 
aix[apos+j]*n2+i ] = avals[ apos+j ];
                                                        ix[iix] = j; //keep 
block boundary                                              
                                                }
                                        }
@@ -1051,13 +1046,13 @@ public class LibMatrixReorg
                
                out.allocateSparseRowsBlock(false);
                
-               SparseRow[] a = in.getSparseBlock();
-               SparseRow[] c = out.getSparseBlock();
+               SparseBlock a = in.getSparseBlock();
+               SparseBlock c = out.getSparseBlock();
                
                //copy all rows into target positions
                for( int i=0; i<m; i++ ) {
-                       if( a[i] != null && !a[i].isEmpty() ) {
-                               c[m-1-i] = new SparseRow(a[i]); 
+                       if( !a.isEmpty(i) ) {
+                               c.set(m-1-i, a.get(i)); 
                        }
                }
        }
@@ -1200,8 +1195,8 @@ public class LibMatrixReorg
                int estnnz = (int) (in.nonZeros/rows);
                
                //sparse reshape
-               SparseRow[] aRows = in.sparseBlock;
-               SparseRow[] cRows = out.sparseBlock;
+               SparseBlock a = in.sparseBlock;
+               SparseBlock c = out.sparseBlock;
                
                if( rowwise )
                {
@@ -1212,18 +1207,16 @@ public class LibMatrixReorg
                        if( rows==1 ) //MATRIX->VECTOR  
                        {
                                //note: cache-friendly on a and c; append-only
-                               if( cRows[0] == null )
-                                       cRows[0] = new SparseRow(estnnz, cols);
-                               SparseRow crow = cRows[0];
+                               c.allocate(0, estnnz, cols);
                                for( int i=0, cix=0; i<rlen; i++, cix+=clen ) 
                                {
-                                       SparseRow arow = aRows[i];
-                                       if( arow!=null && !arow.isEmpty() ) {
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();      
-                                               for( int j=0; j<alen; j++ )
-                                                       crow.append(cix+aix[j], 
avals[j]);
+                                       if( !a.isEmpty(i) ) {
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);   
+                                               for( int j=apos; j<apos+alen; 
j++ )
+                                                       c.append(0, cix+aix[j], 
avals[j]);
                                        }
                                }
                        }
@@ -1235,18 +1228,17 @@ public class LibMatrixReorg
                                
                                for( int i=0; i<rlen; i++ ) 
                                {
-                                       SparseRow arow = aRows[i];
-                                       if( arow!=null && !arow.isEmpty() ){
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();      
-                                               for( int j=0; j<alen; j++ )
+                                       if( !a.isEmpty(i) ){
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);   
+                                               for( int j=apos; j<apos+alen; 
j++ )
                                                {
                                                        int ci = 
(int)((cix+aix[j])/cols);
                                                        int cj = 
(int)((cix+aix[j])%cols);       
-                                                       if( cRows[ci] == null )
-                                                               cRows[ci] = new 
SparseRow(estnnz, cols);
-                                                       cRows[ci].append(cj, 
avals[j]);
+                                                       c.allocate(ci, estnnz, 
cols);
+                                                       c.append(ci, cj, 
avals[j]);
                                                }
                                        }       
                                        
@@ -1263,18 +1255,16 @@ public class LibMatrixReorg
                        if( rlen==1 ) //VECTOR->MATRIX
                        {
                                //note: cache-friendly on a but not c; 
append-only
-                               SparseRow arow = aRows[0];
-                               if( arow!=null && !arow.isEmpty() ){
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();
-                                       for( int j=0; j<alen; j++ )
+                               if( !a.isEmpty(0) ){
+                                       int alen = a.size(0); //always pos 0
+                                       int[] aix = a.indexes(0);
+                                       double[] avals = a.values(0);
+                                       for( int j=0; j<alen; j++ ) 
                                        {
                                                int ci = aix[j]%rows;
                                                int cj = aix[j]/rows;       
-                                               if( cRows[ci] == null )
-                                                       cRows[ci] = new 
SparseRow(estnnz, cols);
-                                               cRows[ci].append(cj, avals[j]);
+                                               c.allocate(ci, estnnz, cols);
+                                               c.append(ci, cj, avals[j]);
                                        }
                                }                                               
                
                        }
@@ -1283,20 +1273,19 @@ public class LibMatrixReorg
                                //note: cache-friendly on a but not c; 
append&sort, in-place w/o shifts
                                for( int i=0; i<rlen; i++ ) 
                                {
-                                       SparseRow arow = aRows[i];
-                                       if( arow!=null && !arow.isEmpty() ){
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();      
-                                               for( int j=0; j<alen; j++ )
+                                       if( !a.isEmpty(i) ){
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);   
+                                               for( int j=apos; j<apos+alen; 
j++ )
                                                {
                                                        //long tmpix because 
total cells in sparse can be larger than int
                                                        long tmpix = 
(long)aix[j]*rlen+i;
                                                        int ci = 
(int)(tmpix%rows);
-                                                       int cj = 
(int)(tmpix/rows);       
-                                                       if( cRows[ci] == null )
-                                                               cRows[ci] = new 
SparseRow(estnnz, cols);
-                                                       cRows[ci].append(cj, 
avals[j]);
+                                                       int cj = 
(int)(tmpix/rows); 
+                                                       c.allocate(ci, estnnz, 
cols);
+                                                       c.append(ci, cj, 
avals[j]);
                                                }
                                        }       
                                }
@@ -1323,13 +1312,12 @@ public class LibMatrixReorg
                        return;
                
                //allocate block if necessary
-               if(out.sparseBlock==null)
-                       out.sparseBlock=new SparseRow[rows];
+               out.allocateSparseRowsBlock(false);
                int estnnz = (int) (in.nonZeros/rows);
                
                //sparse reshape
                double[] a = in.denseBlock;
-               SparseRow[] cRows = out.sparseBlock;
+               SparseBlock c = out.sparseBlock;
                
                if( rowwise )
                {
@@ -1342,10 +1330,9 @@ public class LibMatrixReorg
                                for( int j=0; j<cols; j++ )
                                {
                                        double val = a[aix++];
-                                       if( val != 0 ){
-                                               if( cRows[i] == null )
-                                                       cRows[i] = new 
SparseRow(estnnz, cols);
-                                               cRows[i].append(j, val);
+                                       if( val != 0 ) {
+                                               c.allocate(i, estnnz, cols);
+                                               c.append(i, j, val);
                                        }
                                }
                }       
@@ -1361,10 +1348,9 @@ public class LibMatrixReorg
                                        for( int i=0; i<rows; i++ ) 
                                        {
                                                double val = a[aix++];
-                                               if( val != 0 ){
-                                                       if( cRows[i] == null )
-                                                               cRows[i] = new 
SparseRow(estnnz, cols);
-                                                       cRows[i].append(j, val);
+                                               if( val != 0 ) {
+                                                       c.allocate(i, estnnz, 
cols);
+                                                       c.append(i, j, val);
                                                }
                                        }
                        }
@@ -1377,10 +1363,9 @@ public class LibMatrixReorg
                                                int ai = aix2%rlen;
                                                int aj = aix2/rlen;
                                                double val = a[ ai*clen+aj ];
-                                               if( val != 0 ){
-                                                       if( cRows[i] == null )
-                                                               cRows[i] = new 
SparseRow(estnnz, cols);
-                                                       cRows[i].append(j, val);
+                                               if( val != 0 ) {
+                                                       c.allocate(i, estnnz, 
cols);
+                                                       c.append(i, j, val);
                                                }
                                        }                       
                        }
@@ -1410,7 +1395,7 @@ public class LibMatrixReorg
                out.allocateDenseBlock(false);
                
                //sparse/dense reshape
-               SparseRow[] aRows = in.sparseBlock;
+               SparseBlock a = in.sparseBlock;
                double[] c = out.denseBlock;
                
                if( rowwise )
@@ -1422,12 +1407,12 @@ public class LibMatrixReorg
                        //note: cache-friendly on a and c
                        for( int i=0, cix=0; i<rlen; i++, cix+=clen ) 
                        {
-                               SparseRow arow = aRows[i];
-                               if( arow!=null && !arow.isEmpty() ){
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();      
-                                       for( int j=0; j<alen; j++ )
+                               if( !a.isEmpty(i) ) {
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aix = a.indexes(i);
+                                       double[] avals = a.values(i);   
+                                       for( int j=apos; j<apos+alen; j++ )
                                                c[cix+aix[j]] = avals[j];
                                }       
                        }
@@ -1440,13 +1425,12 @@ public class LibMatrixReorg
                        if( rlen==1 ) //VECTOR->MATRIX
                        {
                                //note: cache-friendly on a but not c
-                               SparseRow arow = aRows[0];
-                               if( arow!=null && !arow.isEmpty() ){
-                                       int alen = arow.size();
-                                       int[] aix = arow.getIndexContainer();
-                                       double[] avals = 
arow.getValueContainer();      
-                                       for( int j=0; j<alen; j++ )
-                                       {
+                               if( !a.isEmpty(0) ){
+                                       int apos = a.pos(0);
+                                       int alen = a.size(0);
+                                       int[] aix = a.indexes(0);
+                                       double[] avals = a.values(0);   
+                                       for( int j=apos; j<apos+alen; j++ ) {
                                                int ci = aix[j]%rows;
                                                int cj = aix[j]/rows;       
                                                c[ci*cols+cj] = avals[j];
@@ -1458,13 +1442,12 @@ public class LibMatrixReorg
                                //note: cache-friendly on a but not c
                                for( int i=0; i<rlen; i++ ) 
                                {
-                                       SparseRow arow = aRows[i];
-                                       if( arow!=null && !arow.isEmpty() ){
-                                               int alen = arow.size();
-                                               int[] aix = 
arow.getIndexContainer();
-                                               double[] avals = 
arow.getValueContainer();      
-                                               for( int j=0; j<alen; j++ )
-                                               {
+                                       if( !a.isEmpty(i) ){
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);   
+                                               for( int j=apos; j<apos+alen; 
j++ ) {
                                                        int tmpix = 
aix[j]*rlen+i;
                                                        int ci = tmpix%rows;
                                                        int cj = tmpix/rows;   
@@ -1699,20 +1682,20 @@ public class LibMatrixReorg
                        return;
                
                int rlen = in.rlen;
-               SparseRow[] aRows = in.sparseBlock;
+               SparseBlock a = in.sparseBlock;
                
                //append all values to right blocks
                MatrixIndexes ixtmp = new MatrixIndexes();
                for( int i=0; i<rlen; i++ )
                {
-                       SparseRow arow = aRows[i];
-                       if( arow!=null && !arow.isEmpty() ) {
+                       if( !a.isEmpty(i) ) {
                                long ai = row_offset+i;
-                               int alen = arow.size();
-                               int[] aix = arow.getIndexContainer();
-                               double[] avals = arow.getValueContainer();
-                               for( int j=0; j<alen; j++ ) 
-                               {
+                               int apos = a.pos(i);
+                               int alen = a.size(i);
+                               int[] aix = a.indexes(i);
+                               double[] avals = a.values(i);
+                               
+                               for( int j=apos; j<apos+alen; j++ )  {
                                        long aj = col_offset+aix[j];
                                        computeResultBlockIndex(ixtmp, ai, aj, 
rows1, cols1, rows2, cols2, brlen2, bclen2, rowwise);
                                        MatrixBlock out = rix.get(ixtmp);
@@ -1831,10 +1814,9 @@ public class LibMatrixReorg
                        
                        if( in.sparse ) //SPARSE 
                        {
-                               SparseRow[] a = in.sparseBlock;
-                               
+                               SparseBlock a = in.sparseBlock;                 
        
                                for ( int i=0; i < m; i++ )
-                                       if ( a[i] != null && !a[i].isEmpty() ) {
+                                       if ( !a.isEmpty(i) ) {
                                                flags[i] = true;
                                                rlen2++;
                                        }
@@ -1872,7 +1854,7 @@ public class LibMatrixReorg
                        //note: output dense or sparse
                        for( int i=0, cix=0; i<m; i++ )
                                if( flags[i] )
-                                       ret.appendRow(cix++, in.sparseBlock[i]);
+                                       ret.appendRow(cix++, 
in.sparseBlock.get(i));
                }
                else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
                {
@@ -1930,13 +1912,14 @@ public class LibMatrixReorg
                        flags = new boolean[ n ]; //false
                        if( in.sparse ) //SPARSE 
                        {
-                               SparseRow[] a = in.sparseBlock;
+                               SparseBlock a = in.sparseBlock;
                                
                                for( int i=0; i<m; i++ ) 
-                                       if ( a[i] != null && !a[i].isEmpty() ) {
-                                               int alen = a[i].size();
-                                               int[] aix = 
a[i].getIndexContainer();
-                                               for( int j=0; j<alen; j++ )
+                                       if ( !a.isEmpty(i) ) {
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               for( int j=apos; j<apos+alen; 
j++ )
                                                        flags[ aix[j] ] = true;
                                        }
                        }
@@ -1978,14 +1961,15 @@ public class LibMatrixReorg
                if( in.sparse ) //* <- SPARSE 
                {
                        //note: output dense or sparse
-                       SparseRow[] a = in.sparseBlock;
+                       SparseBlock a = in.sparseBlock;
                        
                        for( int i=0; i<m; i++ ) 
-                               if ( a[i] != null && !a[i].isEmpty() ) {
-                                       int alen = a[i].size();
-                                       int[] aix = a[i].getIndexContainer();
-                                       double[] avals = 
a[i].getValueContainer();
-                                       for( int j=0; j<alen; j++ )
+                               if ( !a.isEmpty(i) ) {
+                                       int apos = a.pos(i);
+                                       int alen = a.size(i);
+                                       int[] aix = a.indexes(i);
+                                       double[] avals = a.values(i);
+                                       for( int j=apos; j<apos+alen; j++ )
                                                if( flags[aix[j]] )
                                                        ret.appendValue(i, 
cix[aix[j]], avals[j]);
                                }
@@ -2199,6 +2183,7 @@ public class LibMatrixReorg
         * @param ai
         * @param len
         */
+       @SuppressWarnings("unused")
        private static void countAgg( int[] c, int[] ai, final int len ) 
        {
                final int bn = len%8;
@@ -2221,6 +2206,28 @@ public class LibMatrixReorg
                }
        }
        
+       private static void countAgg( int[] c, int[] aix, int ai, final int len 
) 
+       {
+               final int bn = len%8;
+               
+               //compute rest, not aligned to 8-block
+               for( int i=ai; i<ai+bn; i++ )
+                       c[ aix[i] ]++;
+               
+               //unrolled 8-block (for better instruction level parallelism)
+               for( int i=ai+bn; i<ai+len; i+=8 )
+               {
+                       c[ aix[ i+0 ] ] ++;
+                       c[ aix[ i+1 ] ] ++;
+                       c[ aix[ i+2 ] ] ++;
+                       c[ aix[ i+3 ] ] ++;
+                       c[ aix[ i+4 ] ] ++;
+                       c[ aix[ i+5 ] ] ++;
+                       c[ aix[ i+6 ] ] ++;
+                       c[ aix[ i+7 ] ] ++;
+               }
+       }
+       
        /**
         *
         */

[4/5] incubator-systemml git commit: [SYSTEMML-382] Initial runtime integration sparse matrix blocks

Reply via email to