http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8ba0fdcc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index f617f40..01dd8c0 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -27,7 +27,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.commons.math3.util.FastMath; - import org.apache.sysml.lops.MapMultChain.ChainType; import org.apache.sysml.lops.WeightedCrossEntropy.WCeMMType; import org.apache.sysml.lops.WeightedDivMM.WDivMMType; @@ -1170,16 +1169,14 @@ public class LibMatrixMult final int blocksizeK = 32; //note: in contrast to dense-dense, no blocking over j (would require maintaining blocksizeK indexes, counter-productive on skew) - SparseRow[] b = m2.sparseBlock; + SparseBlock b = m2.sparseBlock; if( pm2 && m==1 ) //VECTOR-MATRIX { //parallelization over rows in rhs matrix for( int k=rl; k<ru; k++ ) - if( a[k] != 0 && b[k] != null && !b[k].isEmpty() ) { - int[] bix = b[k].getIndexContainer(); - double[] bvals = b[k].getValueContainer(); - vectMultiplyAdd(a[k], bvals, c, bix, 0, b[k].size()); + if( a[k] != 0 && !b.isEmpty(k) ) { + vectMultiplyAdd(a[k], b.values(k), c, b.indexes(k), b.pos(k), 0, b.size(k)); } } else //MATRIX-MATRIX @@ -1199,12 +1196,8 @@ public class LibMatrixMult for( int k = 0; k < bklen; k++ ) { double val = a[aixi+k]; - SparseRow brow = b[ bk+k ]; - if( val != 0 && brow != null && !brow.isEmpty() ) { - int blen = brow.size(); - int[] bix = brow.getIndexContainer(); - double[] bvals = brow.getValueContainer(); - vectMultiplyAdd(val, bvals, c, bix, cixj, blen); + if( val != 0 && !b.isEmpty(bk+k) ) { + vectMultiplyAdd(val, b.values(bk+k), c, b.indexes(bk+k), b.pos(bk+k), cixj, b.size(bk+k)); } } } @@ -1213,19 +1206,20 @@ public class LibMatrixMult } else { + SparseBlock b = m2.sparseBlock; for( int i=rl, aix=rl*cd, cix=rl*n; i < ru; i++, cix+=n ) for(int k = 0; k < cd; k++, aix++ ) { double val = a[aix]; if( val!=0 ) { - SparseRow brow = m2.sparseBlock[ k ]; - if( brow != null && !brow.isEmpty() ) + if( !b.isEmpty(k) ) { - int blen = brow.size(); - int[] bix = brow.getIndexContainer(); - double[] bvals = brow.getValueContainer(); - for(int j = 0; j < blen; j++) + int bpos = b.pos(k); + int blen = b.size(k); + int[] bix = b.indexes(k); + double[] bvals = b.values(k); + for(int j = bpos; j < bpos+blen; j++) c[cix+bix[j]] += val * bvals[j]; } } @@ -1251,43 +1245,32 @@ public class LibMatrixMult if( LOW_LEVEL_OPTIMIZATION ) { + SparseBlock a = m1.sparseBlock; + if( m==1 && n==1 ) //DOT PRODUCT { - SparseRow arow = m1.sparseBlock[0]; - if( arow != null && !arow.isEmpty() ) - { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - - c[0] = dotProduct(avals, b, aix, 0, alen); + if( !a.isEmpty(0) ) { + c[0] = dotProduct(a.values(0), b, a.indexes(0), a.pos(0), 0, a.size(0)); } } else if( n==1 ) //MATRIX-VECTOR { - for( int i=rl; i<Math.min(ru, m1.sparseBlock.length); i++ ) + for( int i=rl; i<Math.min(ru, a.numRows()); i++ ) { - SparseRow arow = m1.sparseBlock[i]; - if( arow != null && !arow.isEmpty() ) - { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - - c[i] = dotProduct(avals, b, aix, 0, alen); + if( !a.isEmpty(i) ) { + c[i] = dotProduct(a.values(i), b, a.indexes(i), a.pos(i), 0, a.size(i)); } } } else if( pm2 && m==1 ) //VECTOR-MATRIX { //parallelization over rows in rhs matrix - SparseRow arow = m1.sparseBlock[0]; - if( arow != null && !arow.isEmpty() ) + if( !a.isEmpty(0) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - int rlix = (rl==0) ? 0 : arow.searchIndexesFirstGTE(rl); + int alen = a.size(0); + int[] aix = a.indexes(0); + double[] avals = a.values(0); + int rlix = (rl==0) ? 0 : a.posFIndexGTE(0,rl); rlix = (rlix>=0) ? rlix : alen; for( int k=rlix; k<alen && aix[k]<ru; k++ ) { @@ -1300,18 +1283,18 @@ public class LibMatrixMult } else if( pm2 && m<=16 ) //MATRIX-MATRIX (short lhs) { - SparseRow[] a = m1.sparseBlock; - for( int i=0, cix=0; i<a.length; i++, cix+=n ) - if( a[i] != null && !a[i].isEmpty() ) + for( int i=0, cix=0; i<a.numRows(); i++, cix+=n ) + if( !a.isEmpty(i) ) { - int alen = a[i].size(); - int[] aix = a[i].getIndexContainer(); - double[] avals = a[i].getValueContainer(); + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); - int k1 = (rl==0) ? 0 : a[i].searchIndexesFirstGTE(rl); - k1 = (k1>=0) ? k1 : alen; - int k2 = (ru==cd) ? alen : a[i].searchIndexesFirstGTE(ru); - k2 = (k2>=0) ? k2 : alen; + int k1 = (rl==0) ? apos : a.posFIndexGTE(i, rl); + k1 = (k1>=0) ? k1 : apos+alen; + int k2 = (ru==cd) ? apos+alen : a.posFIndexGTE(i, ru); + k2 = (k2>=0) ? k2 : apos+alen; //rest not aligned to blocks of 4 rows final int bn = (k2-k1) % 4; @@ -1330,14 +1313,14 @@ public class LibMatrixMult } else //MATRIX-MATRIX { - for( int i=rl, cix=rl*n; i<Math.min(ru, m1.sparseBlock.length); i++, cix+=n ) + for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); i++, cix+=n ) { - SparseRow arow = m1.sparseBlock[i]; - if( arow != null && !arow.isEmpty() ) + if( !a.isEmpty(i) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); if( alen==1 && avals[0]==1 ) //ROW SELECTION { @@ -1349,13 +1332,13 @@ public class LibMatrixMult //rest not aligned to blocks of 4 rows final int bn = alen % 4; switch( bn ){ - case 1: vectMultiplyAdd(avals[0], b, c, aix[0]*n, cix, n); break; - case 2: vectMultiplyAdd2(avals[0],avals[1], b, c, aix[0]*n, aix[1]*n, cix, n); break; - case 3: vectMultiplyAdd3(avals[0],avals[1],avals[2], b, c, aix[0]*n, aix[1]*n, aix[2]*n, cix, n); break; + case 1: vectMultiplyAdd(avals[apos], b, c, aix[apos]*n, cix, n); break; + case 2: vectMultiplyAdd2(avals[apos],avals[apos+1], b, c, aix[apos]*n, aix[apos+1]*n, cix, n); break; + case 3: vectMultiplyAdd3(avals[apos],avals[apos+1],avals[apos+2], b, c, aix[apos]*n, aix[apos+1]*n, aix[apos+2]*n, cix, n); break; } //compute blocks of 4 rows (core inner loop) - for( int k = bn; k<alen; k+=4 ) { + for( int k = apos+bn; k<apos+alen; k+=4 ) { vectMultiplyAdd4( avals[k], avals[k+1], avals[k+2], avals[k+3], b, c, aix[k]*n, aix[k+1]*n, aix[k+2]*n, aix[k+3]*n, cix, n ); } @@ -1366,17 +1349,17 @@ public class LibMatrixMult } else { - for( int i=rl, cix=rl*n; i<Math.min(ru, m1.sparseBlock.length); i++, cix+=n ) + SparseBlock a = m1.sparseBlock; + for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); i++, cix+=n ) { - SparseRow arow = m1.sparseBlock[i]; - if( arow != null && !arow.isEmpty() ) + if( !a.isEmpty(i) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); - for(int k = 0; k < alen; k++) - { + for(int k = apos; k < apos+alen; k++) { double val = avals[k]; for(int j = 0, bix=aix[k]*n; j < n; j++) c[cix+j] += val * b[bix+j]; @@ -1396,58 +1379,60 @@ public class LibMatrixMult private static void matrixMultSparseSparse(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean pm2, int rl, int ru) throws DMLRuntimeException { - SparseRow[] b = m2.sparseBlock; + SparseBlock a = m1.sparseBlock; + SparseBlock b = m2.sparseBlock; double[] c = ret.denseBlock; int m = m1.rlen; int n = m2.clen; + //TODO perf sparse block + // MATRIX-MATRIX (VV, MV not applicable here because V always dense) if(LOW_LEVEL_OPTIMIZATION) { if( pm2 && m==1 ) //VECTOR-MATRIX { //parallelization over rows in rhs matrix - SparseRow arow = m1.sparseBlock[0]; - if( arow != null && !arow.isEmpty() ) + if( !a.isEmpty(0) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - int rlix = (rl==0) ? 0 : arow.searchIndexesFirstGTE(rl); + int alen = a.size(0); + int[] aix = a.indexes(0); + double[] avals = a.values(0); + int rlix = (rl==0) ? 0 : a.posFIndexGTE(0,rl); rlix = (rlix>=0) ? rlix : alen; for( int k=rlix; k<alen && aix[k]<ru; k++ ) - if( b[aix[k]] != null && !b[aix[k]].isEmpty() ) { - SparseRow brow = b[aix[k]]; - int blen = brow.size(); - int[] bix = brow.getIndexContainer(); - double[] bvals = brow.getValueContainer(); - vectMultiplyAdd(avals[k], bvals, c, bix, 0, blen); + if( !b.isEmpty(aix[k]) ) { + int bpos = b.pos(aix[k]); + int blen = b.size(aix[k]); + int[] bix = b.indexes(aix[k]); + double[] bvals = b.values(aix[k]); + vectMultiplyAdd(avals[k], bvals, c, bix, bpos, 0, blen); } } } else //MATRIX-MATRIX { - for( int i=rl, cix=rl*n; i<Math.min(ru, m1.sparseBlock.length); i++, cix+=n ) + for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); i++, cix+=n ) { - SparseRow arow = m1.sparseBlock[i]; - if( arow != null && !arow.isEmpty() ) + if( !a.isEmpty(i) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); - for(int k = 0; k < alen; k++) + for(int k = apos; k < apos+alen; k++) { double val = avals[k]; - SparseRow brow = b[ aix[k] ]; - if( brow != null && !brow.isEmpty() ) + if( !b.isEmpty(aix[k]) ) { - int blen = brow.size(); - int[] bix = brow.getIndexContainer(); - double[] bvals = brow.getValueContainer(); + int bpos = b.pos(aix[k]); + int blen = b.size(aix[k]); + int[] bix = b.indexes(aix[k]); + double[] bvals = b.values(aix[k]); - vectMultiplyAdd(val, bvals, c, bix, cix, blen); + vectMultiplyAdd(val, bvals, c, bix, bpos, cix, blen); } } } @@ -1456,25 +1441,25 @@ public class LibMatrixMult } else { - for( int i=rl, cix=rl*n; i<Math.min(ru, m1.sparseBlock.length); i++, cix+=n ) + for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); i++, cix+=n ) { - SparseRow arow = m1.sparseBlock[i]; - if( arow != null && !arow.isEmpty() ) + if( !a.isEmpty(i) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); - for(int k = 0; k < alen; k++) + for(int k = apos; k < apos+alen; k++) { double val = avals[k]; - SparseRow brow = m2.sparseBlock[ aix[k] ]; - if( brow != null && !brow.isEmpty() ) + if( !b.isEmpty(aix[k]) ) { - int blen = brow.size(); - int[] bix = brow.getIndexContainer(); - double[] bvals = brow.getValueContainer(); - for(int j = 0; j < blen; j++) + int bpos = b.pos(aix[k]); + int blen = b.size(aix[k]); + int[] bix = b.indexes(aix[k]); + double[] bvals = b.values(aix[k]); + for(int j = bpos; j < bpos+blen; j++) c[cix+bix[j]] += val * bvals[j]; } } @@ -1499,6 +1484,8 @@ public class LibMatrixMult private static void matrixMultUltraSparse(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru) throws DMLRuntimeException { + //TODO perf sparse block, consider iterators + boolean leftUS = m1.isUltraSparse(); final int m = m1.rlen; final int cd = m1.clen; @@ -1506,26 +1493,27 @@ public class LibMatrixMult if( leftUS ) //left is ultra-sparse (IKJ) { + SparseBlock a = m1.sparseBlock; boolean rightSparse = m2.sparse; for( int i=rl; i<ru; i++ ) { - SparseRow arow = m1.sparseBlock[ i ]; - if( arow != null && !arow.isEmpty() ) + if( !a.isEmpty(i) ) { - int alen = arow.size(); - int[] aixs = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); + int apos = a.pos(i); + int alen = a.size(i); + int[] aixs = a.indexes(i); + double[] avals = a.values(i); - if( alen==1 && avals[0]==1 ) //ROW SELECTION (no aggregation) + if( alen==1 && avals[apos]==1 ) //ROW SELECTION (no aggregation) { - int aix = aixs[0]; + int aix = aixs[apos]; if( rightSparse ) { //sparse right matrix (full row copy) - if( m2.sparseBlock!=null && m2.sparseBlock[aix]!=null ) { + if( !m2.sparseBlock.isEmpty(aix) ) { ret.rlen=m; ret.allocateSparseRowsBlock(false); //allocation on demand - ret.sparseBlock[i] = new SparseRow(m2.sparseBlock[aix]); - ret.nonZeros += ret.sparseBlock[i].size(); + ret.sparseBlock.set(i, new SparseRow(m2.sparseBlock.get(aix))); + ret.nonZeros += ret.sparseBlock.size(i); } } else { //dense right matrix (append all values) @@ -1535,7 +1523,7 @@ public class LibMatrixMult } else //GENERAL CASE { - for( int k=0; k<alen; k++ ) + for( int k=apos; k<apos+alen; k++ ) { double aval = avals[k]; int aix = aixs[k]; @@ -1553,15 +1541,17 @@ public class LibMatrixMult } else //right is ultra-sparse (KJI) { + SparseBlock b = m2.sparseBlock; + for(int k = 0; k < cd; k++ ) { - SparseRow brow = m2.sparseBlock[ k ]; - if( brow != null && !brow.isEmpty() ) + if( !b.isEmpty(k) ) { - int blen = brow.size(); - int[] bixs = brow.getIndexContainer(); - double[] bvals = brow.getValueContainer(); - for( int j=0; j<blen; j++ ) + int bpos = b.pos(k); + int blen = b.size(k); + int[] bixs = b.indexes(k); + double[] bvals = b.values(k); + for( int j=bpos; j<bpos+blen; j++ ) { double bval = bvals[j]; int bix = bixs[j]; @@ -1647,7 +1637,7 @@ public class LibMatrixMult */ private static void matrixMultChainSparse(MatrixBlock mX, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, ChainType ct, int rl, int ru) { - SparseRow[] a = mX.sparseBlock; + SparseBlock a = mX.sparseBlock; double[] b = mV.denseBlock; double[] w = (mW!=null) ? mW.denseBlock : null; double[] c = ret.denseBlock; @@ -1667,12 +1657,10 @@ public class LibMatrixMult //compute 1st matrix-vector for row block for( int j=0; j < tmplen; j++) { - SparseRow arow = a[bi+j]; - if( arow != null && !arow.isEmpty() ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - tmp[j] = dotProduct(avals, b, aix, 0, alen); + if( !a.isEmpty(bi+j) ) { + int apos = a.pos(bi+j); + int alen = a.size(bi+j); + tmp[j] = dotProduct(a.values(bi+j), b, a.indexes(bi+j), apos, 0, alen); } } @@ -1684,12 +1672,12 @@ public class LibMatrixMult //compute 2nd matrix vector for row block and aggregate for( int j=0; j < tmplen; j++) { - SparseRow arow = a[bi+j]; - if( arow != null && !arow.isEmpty() && tmp[j] != 0 ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - vectMultiplyAdd(tmp[j], avals, c, aix, 0, alen); + if( !a.isEmpty(bi+j) && tmp[j] != 0 ) { + int apos = a.pos(bi+j); + int alen = a.size(bi+j); + int[] aix = a.indexes(bi+j); + double[] avals = a.values(bi+j); + vectMultiplyAdd(tmp[j], avals, c, aix, apos, 0, alen); } } } @@ -1848,6 +1836,7 @@ public class LibMatrixMult { //2) transpose self matrix multiply sparse // (compute only upper-triangular matrix due to symmetry) + SparseBlock a = m1.sparseBlock; double[] c = ret.denseBlock; int m = m1.rlen; int n = m1.clen; @@ -1858,16 +1847,17 @@ public class LibMatrixMult //algorithm: scan rows, foreach row self join (KIJ) if( LOW_LEVEL_OPTIMIZATION ) { - for( SparseRow arow : m1.sparseBlock ) - if( arow != null && !arow.isEmpty() ) + for( int r=0; r<a.numRows(); r++ ) + if( !a.isEmpty(r) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - int rlix = (rl==0) ? 0 : arow.searchIndexesFirstGTE(rl); - rlix = (rlix>=0) ? rlix : alen; + int apos = a.pos(r); + int alen = a.size(r); + int[] aix = a.indexes(r); + double[] avals = a.values(r); + int rlix = (rl==0) ? apos : a.posFIndexGTE(r, rl); + rlix = (rlix>=0) ? rlix : apos+alen; - for(int i = rlix; i < alen && aix[i]<ru; i++) + for(int i = rlix; i < apos+alen && aix[i]<ru; i++) { double val = avals[i]; if( val != 0 ) { @@ -1879,20 +1869,21 @@ public class LibMatrixMult } else { - for( SparseRow arow : m1.sparseBlock ) - if( arow != null && !arow.isEmpty() ) + for( int r=0; r<a.numRows(); r++ ) + if( !a.isEmpty(r) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - int rlix = (rl==0) ? 0 : arow.searchIndexesFirstGTE(rl); - rlix = (rlix>=0) ? rlix : alen; + int apos = a.pos(r); + int alen = a.size(r); + int[] aix = a.indexes(r); + double[] avals = a.values(r); + int rlix = (rl==0) ? apos : a.posFIndexGTE(r, rl); + rlix = (rlix>=0) ? rlix : apos+alen; - for(int i = rlix; i < alen && aix[i]<ru; i++) + for(int i = rlix; i < apos+alen && aix[i]<ru; i++) { double val = avals[i]; if( val != 0 ) - for(int j = i, ix2 = aix[i]*n; j < alen; j++) + for(int j = i, ix2 = aix[i]*n; j < apos+alen; j++) c[ix2+aix[j]] += val * avals[j]; } } @@ -1902,11 +1893,9 @@ public class LibMatrixMult { if( m==1 ) //VECTOR { - SparseRow arow = m1.sparseBlock[0]; - if( arow !=null && !arow.isEmpty() ) - { - int alen = arow.size(); - double[] avals = arow.getValueContainer(); + if( !m1.sparseBlock.isEmpty(0) ) { + int alen = m1.sparseBlock.size(0); //pos always 0 + double[] avals = a.values(0); c[0] = dotProduct(avals, avals, alen); } } @@ -1921,16 +1910,17 @@ public class LibMatrixMult //algorithm: scan rows, foreach row self join (KIJ) if( LOW_LEVEL_OPTIMIZATION ) { - for( SparseRow arow : m1.sparseBlock ) - if( arow != null && !arow.isEmpty() ) + for( int r=0; r<a.numRows(); r++ ) + if( !a.isEmpty(r) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - int rlix = (rl==0) ? 0 : arow.searchIndexesFirstGTE(rl); - rlix = (rlix>=0) ? rlix : alen; + int apos = a.pos(r); + int alen = a.size(r); + int[] aix = a.indexes(r); + double[] avals = a.values(r); + int rlix = (rl==0) ? apos : a.posFIndexGTE(r, rl); + rlix = (rlix>=0) ? rlix : apos+alen; - for(int i = rlix; i < alen && aix[i]<ru; i++) + for(int i = rlix; i < apos+alen && aix[i]<ru; i++) { double val = avals[i]; if( val != 0 ) { @@ -1942,16 +1932,17 @@ public class LibMatrixMult } else { - for( SparseRow arow : m1.sparseBlock ) - if( arow != null && !arow.isEmpty() ) + for( int r=0; r<a.numRows(); r++ ) + if( !a.isEmpty(r) ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - int rlix = (rl==0) ? 0 : arow.searchIndexesFirstGTE(rl); - rlix = (rlix>=0) ? rlix : alen; + int apos = a.pos(r); + int alen = a.size(r); + int[] aix = a.indexes(r); + double[] avals = a.values(r); + int rlix = (rl==0) ? apos : a.posFIndexGTE(r,rl); + rlix = (rlix>=0) ? rlix : apos+alen; - for(int i = rlix; i < alen && aix[i]<ru; i++) + for(int i = rlix; i < apos+alen && aix[i]<ru; i++) { double val = avals[i]; if( val != 0 ) @@ -2023,7 +2014,7 @@ public class LibMatrixMult { double[] a = pm1.denseBlock; double[] b = m2.denseBlock; - SparseRow[] c = ret1.sparseBlock; + SparseBlock c = ret1.sparseBlock; final int n = m2.clen; final int brlen = ret1.getNumRows(); @@ -2048,9 +2039,8 @@ public class LibMatrixMult } //append entire dense row into sparse target position - c[bpos] = new SparseRow( n ); for( int j=0; j<n; j++ ) - c[bpos].append(j, b[bix+j]); + c.append(bpos, j, b[bix+j]); lastblk = blk; } } @@ -2069,8 +2059,8 @@ public class LibMatrixMult private static void matrixMultPermuteSparse( MatrixBlock pm1, MatrixBlock m2, MatrixBlock ret1, MatrixBlock ret2, int rl, int ru) { double[] a = pm1.denseBlock; - SparseRow[] b = m2.sparseBlock; - SparseRow[] c = ret1.sparseBlock; + SparseBlock b = m2.sparseBlock; + SparseBlock c = ret1.sparseBlock; final int brlen = ret1.getNumRows(); @@ -2093,8 +2083,7 @@ public class LibMatrixMult } //memcopy entire sparse row into target position - if( b[i] != null ) - c[bpos] = new SparseRow( b[i] ); + c.set(bpos, new SparseRow( b.get(i) )); lastblk = blk; } } @@ -2198,8 +2187,8 @@ public class LibMatrixMult */ private static void matrixMultWSLossSparseDense(MatrixBlock mX, MatrixBlock mU, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, WeightsType wt, int rl, int ru) { - SparseRow[] x = mX.sparseBlock; - SparseRow[] w = (mW!=null)? mW.sparseBlock : null; + SparseBlock x = mX.sparseBlock; + SparseBlock w = (mW!=null)? mW.sparseBlock : null; double[] u = mU.denseBlock; double[] v = mV.denseBlock; final int n = mX.clen; @@ -2211,11 +2200,12 @@ public class LibMatrixMult { // approach: iterate over W, point-wise in order to exploit sparsity for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - for( int k=0; k<wlen; k++ ) { + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); + for( int k=wpos; k<wpos+wlen; k++ ) { double xi = mX.quickGetValue(i, wix[k]); double uvij = dotProduct(u, v, uix, wix[k]*cd, cd); wsloss += wval[k]*(xi-uvij)*(xi-uvij); @@ -2227,11 +2217,12 @@ public class LibMatrixMult { // approach: iterate over W, point-wise in order to exploit sparsity for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) - if( x[i] != null && !x[i].isEmpty() ) { - int xlen = x[i].size(); - int[] xix = x[i].getIndexContainer(); - double[] xval = x[i].getValueContainer(); - for( int k=0; k<xlen; k++ ) { + if( !x.isEmpty(i) ) { + int xpos = x.pos(i); + int xlen = x.size(i); + int[] xix = x.indexes(i); + double[] xval = x.values(i); + for( int k=xpos; k<xpos+xlen; k++ ) { double uvij = dotProduct(u, v, uix, xix[k]*cd, cd); wsloss += (xval[k]-uvij)*(xval[k]-uvij); } @@ -2259,18 +2250,19 @@ public class LibMatrixMult // approach: iterate over all cells of X and for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) { - if( x[i]==null || x[i].isEmpty() ) { //empty row + if( x.isEmpty(i) ) { //empty row for( int j=0, vix=0; j<n; j++, vix+=cd) { double uvij = dotProduct(u, v, uix, vix, cd); wsloss += (-uvij)*(-uvij); } } else { //non-empty row - int xlen = x[i].size(); - int[] xix = x[i].getIndexContainer(); - double[] xval = x[i].getValueContainer(); + int xpos = x.pos(i); + int xlen = x.size(i); + int[] xix = x.indexes(i); + double[] xval = x.values(i); int last = -1; - for( int k=0; k<xlen; k++ ) { + for( int k=xpos; k<xpos+xlen; k++ ) { //process last nnz til current nnz for( int k2=last+1; k2<xix[k]; k2++ ){ double uvij = dotProduct(u, v, uix, k2*cd, cd); @@ -2316,14 +2308,15 @@ public class LibMatrixMult // approach: iterate over W, point-wise in order to exploit sparsity if( mW.sparse ) //SPARSE { - SparseRow[] wrows = mW.sparseBlock; + SparseBlock w = mW.sparseBlock; for( int i=rl; i<ru; i++ ) - if( wrows[i] != null && !wrows[i].isEmpty() ){ - int wlen = wrows[i].size(); - int[] wix = wrows[i].getIndexContainer(); - double[] wval = wrows[i].getValueContainer(); - for( int k=0; k<wlen; k++ ) { + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); + for( int k=wpos; k<wpos+wlen; k++ ) { double uvij = dotProductGeneric(mU, mV, i, wix[k], cd); double xi = mX.quickGetValue(i, wix[k]); wsloss += wval[k]*(xi-uvij)*(xi-uvij); @@ -2349,14 +2342,15 @@ public class LibMatrixMult // approach: iterate over W, point-wise in order to exploit sparsity if( mW.sparse ) //SPARSE { - SparseRow[] xrows = mX.sparseBlock; + SparseBlock x = mX.sparseBlock; for( int i=rl; i<ru; i++ ) - if( xrows[i] != null && !xrows[i].isEmpty() ){ - int xlen = xrows[i].size(); - int[] xix = xrows[i].getIndexContainer(); - double[] xval = xrows[i].getValueContainer(); - for( int k=0; k<xlen; k++ ) { + if( !x.isEmpty(i) ) { + int xpos = x.pos(i); + int xlen = x.size(i); + int[] xix = x.indexes(i); + double[] xval = x.values(i); + for( int k=xpos; k<xpos+xlen; k++ ) { double uvij = dotProductGeneric(mU, mV, i, xix[k], cd); wsloss += (xval[k]-uvij)*(xval[k]-uvij); } @@ -2469,11 +2463,10 @@ public class LibMatrixMult private static void matrixMultWSigmoidSparseDense(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WSigmoidType wt, int rl, int ru) throws DMLRuntimeException { - SparseRow[] w = mW.sparseBlock; - SparseRow[] c = ret.sparseBlock; + SparseBlock w = mW.sparseBlock; + SparseBlock c = ret.sparseBlock; double[] u = mU.denseBlock; double[] v = mV.denseBlock; - final int n = mW.clen; final int cd = mU.clen; boolean flagminus = (wt==WSigmoidType.MINUS || wt==WSigmoidType.LOG_MINUS); @@ -2481,15 +2474,16 @@ public class LibMatrixMult //approach: iterate over non-zeros of w, selective mm computation for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - c[i] = new SparseRow(wlen, n); + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); - for( int k=0; k<wlen; k++ ) { + c.allocate(i, wlen); + for( int k=wpos; k<wpos+wlen; k++ ) { double cval = wsigmoid(wval[k], u, v, uix, wix[k]*cd, flagminus, flaglog, cd); - c[i].append(wix[k], cval); + c.append(i, wix[k], cval); } } } @@ -2519,19 +2513,20 @@ public class LibMatrixMult if( mW.sparse ) //SPARSE { //w and c always in same representation - SparseRow[] w = mW.sparseBlock; - SparseRow[] c = ret.sparseBlock; + SparseBlock w = mW.sparseBlock; + SparseBlock c = ret.sparseBlock; for( int i=rl; i<ru; i++ ) - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - c[i] = new SparseRow(wlen, n); + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); - for( int k=0; k<wlen; k++ ) { + c.allocate(i, wlen); + for( int k=wpos; k<wpos+wlen; k++ ) { double cval = wsigmoid(wval[k], mU, mV, i, wix[k], flagminus, flaglog, cd); - c[i].append(wix[k], cval); + c.append(i, wix[k], cval); } } } @@ -2622,26 +2617,27 @@ public class LibMatrixMult final boolean minus = wt.isMinus(); final int cd = mU.clen; - SparseRow[] w = mW.sparseBlock; + SparseBlock w = mW.sparseBlock; double[] u = mU.denseBlock; double[] v = mV.denseBlock; double[] c = ret.denseBlock; //approach: iterate over non-zeros of w, selective mm computation for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) { - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); if( basic ) { - for( int k=0; k<wlen; k++ ) + for( int k=wpos; k<wpos+wlen; k++ ) ret.appendValue( i, wix[k], wval[k] * dotProduct(u, v, uix, wix[k]*cd, cd)); } else { //left/right minus default - int k = (cl==0) ? 0 : w[i].searchIndexesFirstGTE(cl); - k = (k>=0) ? k : wlen; - for( ; k<wlen && wix[k]<cu; k++ ) + int k = (cl==0) ? wpos : w.posFIndexGTE(i,cl); + k = (k>=0) ? k : wpos+wlen; + for( ; k<wpos+wlen && wix[k]<cu; k++ ) wdivmm(wval[k], u, v, c, uix, wix[k]*cd, left, mult, minus, cd); } } @@ -2676,16 +2672,17 @@ public class LibMatrixMult //approach: iterate over non-zeros of w, selective mm computation if( mW.sparse ) //SPARSE { - SparseRow[] w = mW.sparseBlock; + SparseBlock w = mW.sparseBlock; for( int i=rl; i<ru; i++ ) { - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - int k = (cl==0) ? 0 : w[i].searchIndexesFirstGTE(cl); - k = (k>=0) ? k : wlen; - for( ; k<wlen && wix[k]<cu; k++ ) { + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); + int k = (cl==0) ? wpos : w.posFIndexGTE(i,cl); + k = (k>=0) ? k : wpos+wlen; + for( ; k<wpos+wlen && wix[k]<cu; k++ ) { if( basic ) { double uvij = dotProductGeneric(mU,mV, i, wix[k], cd); ret.appendValue(i, wix[k], uvij); @@ -2769,7 +2766,7 @@ public class LibMatrixMult */ private static void matrixMultWCeMMSparseDense(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WCeMMType wt, int rl, int ru) { - SparseRow[] w = mW.sparseBlock; + SparseBlock w = mW.sparseBlock; double[] u = mU.denseBlock; double[] v = mV.denseBlock; final int cd = mU.clen; @@ -2777,11 +2774,12 @@ public class LibMatrixMult // approach: iterate over all cells of X and for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) { - if( w[i]!=null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - for( int k=0; k<wlen; k++ ) { + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); + for( int k=wpos; k<wpos+wlen; k++ ) { double uvij = dotProduct(u, v, uix, wix[k]*cd, cd); wceval += wval[k] * FastMath.log(uvij); } @@ -2811,14 +2809,15 @@ public class LibMatrixMult //approach: iterate over non-zeros of w, selective mm computation if( mW.sparse ) //SPARSE { - SparseRow[] w = mW.sparseBlock; + SparseBlock w = mW.sparseBlock; for( int i=rl; i<ru; i++ ) - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - for( int k=0; k<wlen; k++ ) { + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); + for( int k=wpos; k<wpos+wlen; k++ ) { double uvij = dotProductGeneric(mU, mV, i, wix[k], cd); wceval += wval[k] * FastMath.log(uvij); } @@ -2905,26 +2904,26 @@ public class LibMatrixMult private static void matrixMultWuMMSparseDense(MatrixBlock mW, MatrixBlock mU, MatrixBlock mV, MatrixBlock ret, WUMMType wt, ValueFunction fn, int rl, int ru) throws DMLRuntimeException { - SparseRow[] w = mW.sparseBlock; - SparseRow[] c = ret.sparseBlock; + SparseBlock w = mW.sparseBlock; + SparseBlock c = ret.sparseBlock; double[] u = mU.denseBlock; double[] v = mV.denseBlock; - final int n = mW.clen; final int cd = mU.clen; boolean flagmult = (wt==WUMMType.MULT); //approach: iterate over non-zeros of w, selective mm computation for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - c[i] = new SparseRow(wlen, n); + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); - for( int k=0; k<wlen; k++ ) { + c.allocate(i, wlen); + for( int k=wpos; k<wpos+wlen; k++ ) { double cval = wumm(wval[k], u, v, uix, wix[k]*cd, flagmult, fn, cd); - c[i].append(wix[k], cval); + c.append(i, wix[k], cval); } } } @@ -2953,19 +2952,20 @@ public class LibMatrixMult if( mW.sparse ) //SPARSE { //w and c always in same representation - SparseRow[] w = mW.sparseBlock; - SparseRow[] c = ret.sparseBlock; + SparseBlock w = mW.sparseBlock; + SparseBlock c = ret.sparseBlock; for( int i=rl; i<ru; i++ ) - if( w[i] != null && !w[i].isEmpty() ) { - int wlen = w[i].size(); - int[] wix = w[i].getIndexContainer(); - double[] wval = w[i].getValueContainer(); - c[i] = new SparseRow(wlen, n); + if( !w.isEmpty(i) ) { + int wpos = w.pos(i); + int wlen = w.size(i); + int[] wix = w.indexes(i); + double[] wval = w.values(i); - for( int k=0; k<wlen; k++ ) { + c.allocate(i, wlen); + for( int k=wpos; k<wpos+wlen; k++ ) { double cval = wumm(wval[k], mU, mV, i, wix[k], flagmult, fn, cd); - c[i].append(wix[k], cval); + c.append(i, wix[k], cval); } } } @@ -3064,17 +3064,17 @@ public class LibMatrixMult return val; } - private static double dotProduct( double[] a, double[] b, int[] aix, final int bi, final int len ) + private static double dotProduct( double[] a, double[] b, int[] aix, int ai, final int bi, final int len ) { double val = 0; final int bn = len%8; //compute rest - for( int i = 0; i < bn; i++ ) + for( int i = ai; i < ai+bn; i++ ) val += a[ i ] * b[ bi+aix[i] ]; //unrolled 8-block (for better instruction-level parallelism) - for( int i = bn; i < len; i+=8 ) + for( int i = ai+bn; i < ai+len; i+=8 ) { //read 64B cacheline of a //read 64B of b via 'gather' @@ -3250,6 +3250,7 @@ public class LibMatrixMult * @param ci * @param len */ + @SuppressWarnings("unused") private static void vectMultiplyAdd( final double aval, double[] b, double[] c, int[] bix, final int ci, final int len ) { final int bn = len%8;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8ba0fdcc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java index c46e2c5..38df263 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixOuterAgg.java @@ -901,17 +901,17 @@ public class LibMatrixOuterAgg if( in.isEmptyBlock(false) ) return; - SparseRow[] aSparseRows = in.getSparseBlock(); - for (int j = 0; j < aSparseRows.length; ++j) - if( aSparseRows[j]!=null && !aSparseRows[j].isEmpty() ) - { - double [] aValues = aSparseRows[j].getValueContainer(); - int [] aIndexes = aSparseRows[j].getIndexContainer(); + SparseBlock sblock = in.getSparseBlock(); + for( int j = 0; j < sblock.numRows(); j++) + if( !sblock.isEmpty(j) ) { + int apos = sblock.pos(j); + int alen = sblock.size(j); + int[] aix = sblock.indexes(j); + double [] avals = sblock.values(j); - for (int i=0; i < aValues.length; ++i) - { - int cnt = sumRowSumGtLeColSumLtGe(aValues[i], bv, bOp); - out.quickSetValue(0, aIndexes[i], cnt); + for (int i=apos; i < apos+alen; i++) { + int cnt = sumRowSumGtLeColSumLtGe(avals[i], bv, bOp); + out.quickSetValue(0, aix[i], cnt); } } } @@ -961,17 +961,17 @@ public class LibMatrixOuterAgg if( in.isEmptyBlock(false) ) return; - SparseRow[] aSparseRows = in.getSparseBlock(); - for (int j = 0; j < aSparseRows.length; ++j) - if( aSparseRows[j]!=null && !aSparseRows[j].isEmpty() ) - { - double [] aValues = aSparseRows[j].getValueContainer(); - int [] aIndexes = aSparseRows[j].getIndexContainer(); + SparseBlock sblock = in.getSparseBlock(); + for (int j = 0; j < sblock.numRows(); j++) + if( !sblock.isEmpty(j) ) { + int apos = sblock.pos(j); + int alen = sblock.size(j); + int[] aix = sblock.indexes(j); + double [] avals = sblock.values(j); - for (int i=0; i < aValues.length; ++i) - { - int cnt = sumRowSumLtGeColSumGtLe(aValues[i], bv, bOp); - out.quickSetValue(0, aIndexes[i], cnt); + for (int i=apos; i < apos+alen; i++) { + int cnt = sumRowSumLtGeColSumGtLe(avals[i], bv, bOp); + out.quickSetValue(0, aix[i], cnt); } } } @@ -1021,17 +1021,17 @@ public class LibMatrixOuterAgg if( in.isEmptyBlock(false) ) return; - SparseRow[] aSparseRows = in.getSparseBlock(); - for (int j = 0; j < aSparseRows.length; ++j) - if( aSparseRows[j]!=null && !aSparseRows[j].isEmpty() ) - { - double [] aValues = aSparseRows[j].getValueContainer(); - int [] aIndexes = aSparseRows[j].getIndexContainer(); + SparseBlock sblock = in.getSparseBlock(); + for (int j = 0; j < sblock.numRows(); j++) + if( !sblock.isEmpty(j) ) { + int apos = sblock.pos(j); + int alen = sblock.size(j); + int[] aix = sblock.indexes(j); + double [] avals = sblock.values(j); - for (int i=0; i < aValues.length; ++i) - { - int cnt = sumEqNe(aValues[i], bv, bOp); - out.quickSetValue(0, aIndexes[i], cnt); + for (int i=apos; i < apos+alen; ++i) { + int cnt = sumEqNe(avals[i], bv, bOp); + out.quickSetValue(0, aix[i], cnt); } } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8ba0fdcc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java index 4ea4d1a..bdfe7c8 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java @@ -365,8 +365,8 @@ public class LibMatrixReorg out.allocateSparseRowsBlock(false); for( int i=0; i<rlen; i++ ) { int ix = vix[i]; - if( in.sparseBlock[ix]!=null && !in.sparseBlock[ix].isEmpty() ) { - out.sparseBlock[i] = new SparseRow(in.sparseBlock[ix]); + if( !in.sparseBlock.isEmpty(ix) ) { + out.sparseBlock.set(i, in.sparseBlock.get(ix)); } } } @@ -799,7 +799,7 @@ public class LibMatrixReorg out.allocateSparseRowsBlock(); double[] a = in.getDenseBlock(); - SparseRow[] c = out.getSparseBlock(); + SparseBlock c = out.getSparseBlock(); //blocking according to typical L2 cache sizes final int blocksizeI = 128; @@ -815,9 +815,8 @@ public class LibMatrixReorg for( int i=bi; i<bimin; i++ ) for( int j=bj, aix=i*n+bj; j<bjmin; j++, aix++ ) { - if( c[j] == null ) - c[j] = new SparseRow(ennz2,n2); - c[j].append(i, a[aix]); + c.allocate(j, ennz2, n2); + c.append(j, i, a[aix]); } } @@ -841,8 +840,8 @@ public class LibMatrixReorg out.reset(m2, n2, true); //always sparse out.allocateSparseRowsBlock(); - SparseRow[] a = in.getSparseBlock(); - SparseRow[] c = out.getSparseBlock(); + SparseBlock a = in.getSparseBlock(); + SparseBlock c = out.getSparseBlock(); //initial pass to determine capacity (this helps to prevent //sparse row reallocations and mem inefficiency w/ skew @@ -850,17 +849,18 @@ public class LibMatrixReorg if( n <= 4096 ) { //16KB cnt = new int[n]; for( int i=0; i<m; i++ ) { - if( a[i] !=null && !a[i].isEmpty() ) - countAgg(cnt, a[i].getIndexContainer(), a[i].size()); + if( !a.isEmpty(i) ) + countAgg(cnt, a.indexes(i), a.pos(i), a.size(i)); } } //allocate output sparse rows - if( cnt != null ) { - for( int i=0; i<m2; i++ ) - if( cnt[i] > 0 ) - c[i] = new SparseRow(cnt[i]); - } + //TODO perf sparse block + //if( cnt != null ) { + // for( int i=0; i<m2; i++ ) + // if( cnt[i] > 0 ) + // c[i] = new SparseRow(cnt[i]); + //} //blocking according to typical L2 cache sizes final int blocksizeI = 128; @@ -881,18 +881,15 @@ public class LibMatrixReorg //core transpose operation for( int i=bi, iix=0; i<bimin; i++, iix++ ) { - SparseRow arow = a[i]; - if( arow!=null && !arow.isEmpty() ) - { - int alen = arow.size(); - double[] avals = arow.getValueContainer(); - int[] aix = arow.getIndexContainer(); + if( !a.isEmpty(i) ) { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); int j = ix[iix]; //last block boundary - for( ; j<alen && aix[j]<bjmin; j++ ) - { - if( c[aix[j]] == null ) - c[aix[j]] = new SparseRow(ennz2,n2); - c[aix[j]].append(i, avals[j]); + for( ; j<alen && aix[j]<bjmin; j++ ) { + c.allocate(aix[apos+j], ennz2,n2); + c.append(aix[apos+j], i, avals[apos+j]); } ix[iix] = j; //keep block boundary } @@ -920,15 +917,14 @@ public class LibMatrixReorg out.reset(m2, n2, false); //always dense out.allocateDenseBlock(); - SparseRow[] a = in.getSparseBlock(); + SparseBlock a = in.getSparseBlock(); double[] c = out.getDenseBlock(); if( m==1 ) //ROW VECTOR TRANSPOSE { - SparseRow arow = a[0]; - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); + int alen = a.size(0); //always pos 0 + int[] aix = a.indexes(0); + double[] avals = a.values(0); for( int j=0; j<alen; j++ ) c[ aix[j] ] = avals[j]; } @@ -953,15 +949,14 @@ public class LibMatrixReorg //core transpose operation for( int i=bi, iix=0; i<bimin; i++, iix++ ) { - SparseRow arow = a[i]; - if( arow!=null && !arow.isEmpty() ) - { - int alen = arow.size(); - double[] avals = arow.getValueContainer(); - int[] aix = arow.getIndexContainer(); + if( !a.isEmpty(i) ) { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); int j = ix[iix]; //last block boundary - for( ; j<alen && aix[j]<bjmin; j++ ) - c[ aix[j]*n2+i ] = avals[ j ]; + for( ; j<alen && aix[apos+j]<bjmin; j++ ) + c[ aix[apos+j]*n2+i ] = avals[ apos+j ]; ix[iix] = j; //keep block boundary } } @@ -1051,13 +1046,13 @@ public class LibMatrixReorg out.allocateSparseRowsBlock(false); - SparseRow[] a = in.getSparseBlock(); - SparseRow[] c = out.getSparseBlock(); + SparseBlock a = in.getSparseBlock(); + SparseBlock c = out.getSparseBlock(); //copy all rows into target positions for( int i=0; i<m; i++ ) { - if( a[i] != null && !a[i].isEmpty() ) { - c[m-1-i] = new SparseRow(a[i]); + if( !a.isEmpty(i) ) { + c.set(m-1-i, a.get(i)); } } } @@ -1200,8 +1195,8 @@ public class LibMatrixReorg int estnnz = (int) (in.nonZeros/rows); //sparse reshape - SparseRow[] aRows = in.sparseBlock; - SparseRow[] cRows = out.sparseBlock; + SparseBlock a = in.sparseBlock; + SparseBlock c = out.sparseBlock; if( rowwise ) { @@ -1212,18 +1207,16 @@ public class LibMatrixReorg if( rows==1 ) //MATRIX->VECTOR { //note: cache-friendly on a and c; append-only - if( cRows[0] == null ) - cRows[0] = new SparseRow(estnnz, cols); - SparseRow crow = cRows[0]; + c.allocate(0, estnnz, cols); for( int i=0, cix=0; i<rlen; i++, cix+=clen ) { - SparseRow arow = aRows[i]; - if( arow!=null && !arow.isEmpty() ) { - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) - crow.append(cix+aix[j], avals[j]); + if( !a.isEmpty(i) ) { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) + c.append(0, cix+aix[j], avals[j]); } } } @@ -1235,18 +1228,17 @@ public class LibMatrixReorg for( int i=0; i<rlen; i++ ) { - SparseRow arow = aRows[i]; - if( arow!=null && !arow.isEmpty() ){ - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) + if( !a.isEmpty(i) ){ + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) { int ci = (int)((cix+aix[j])/cols); int cj = (int)((cix+aix[j])%cols); - if( cRows[ci] == null ) - cRows[ci] = new SparseRow(estnnz, cols); - cRows[ci].append(cj, avals[j]); + c.allocate(ci, estnnz, cols); + c.append(ci, cj, avals[j]); } } @@ -1263,18 +1255,16 @@ public class LibMatrixReorg if( rlen==1 ) //VECTOR->MATRIX { //note: cache-friendly on a but not c; append-only - SparseRow arow = aRows[0]; - if( arow!=null && !arow.isEmpty() ){ - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) + if( !a.isEmpty(0) ){ + int alen = a.size(0); //always pos 0 + int[] aix = a.indexes(0); + double[] avals = a.values(0); + for( int j=0; j<alen; j++ ) { int ci = aix[j]%rows; int cj = aix[j]/rows; - if( cRows[ci] == null ) - cRows[ci] = new SparseRow(estnnz, cols); - cRows[ci].append(cj, avals[j]); + c.allocate(ci, estnnz, cols); + c.append(ci, cj, avals[j]); } } } @@ -1283,20 +1273,19 @@ public class LibMatrixReorg //note: cache-friendly on a but not c; append&sort, in-place w/o shifts for( int i=0; i<rlen; i++ ) { - SparseRow arow = aRows[i]; - if( arow!=null && !arow.isEmpty() ){ - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) + if( !a.isEmpty(i) ){ + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) { //long tmpix because total cells in sparse can be larger than int long tmpix = (long)aix[j]*rlen+i; int ci = (int)(tmpix%rows); - int cj = (int)(tmpix/rows); - if( cRows[ci] == null ) - cRows[ci] = new SparseRow(estnnz, cols); - cRows[ci].append(cj, avals[j]); + int cj = (int)(tmpix/rows); + c.allocate(ci, estnnz, cols); + c.append(ci, cj, avals[j]); } } } @@ -1323,13 +1312,12 @@ public class LibMatrixReorg return; //allocate block if necessary - if(out.sparseBlock==null) - out.sparseBlock=new SparseRow[rows]; + out.allocateSparseRowsBlock(false); int estnnz = (int) (in.nonZeros/rows); //sparse reshape double[] a = in.denseBlock; - SparseRow[] cRows = out.sparseBlock; + SparseBlock c = out.sparseBlock; if( rowwise ) { @@ -1342,10 +1330,9 @@ public class LibMatrixReorg for( int j=0; j<cols; j++ ) { double val = a[aix++]; - if( val != 0 ){ - if( cRows[i] == null ) - cRows[i] = new SparseRow(estnnz, cols); - cRows[i].append(j, val); + if( val != 0 ) { + c.allocate(i, estnnz, cols); + c.append(i, j, val); } } } @@ -1361,10 +1348,9 @@ public class LibMatrixReorg for( int i=0; i<rows; i++ ) { double val = a[aix++]; - if( val != 0 ){ - if( cRows[i] == null ) - cRows[i] = new SparseRow(estnnz, cols); - cRows[i].append(j, val); + if( val != 0 ) { + c.allocate(i, estnnz, cols); + c.append(i, j, val); } } } @@ -1377,10 +1363,9 @@ public class LibMatrixReorg int ai = aix2%rlen; int aj = aix2/rlen; double val = a[ ai*clen+aj ]; - if( val != 0 ){ - if( cRows[i] == null ) - cRows[i] = new SparseRow(estnnz, cols); - cRows[i].append(j, val); + if( val != 0 ) { + c.allocate(i, estnnz, cols); + c.append(i, j, val); } } } @@ -1410,7 +1395,7 @@ public class LibMatrixReorg out.allocateDenseBlock(false); //sparse/dense reshape - SparseRow[] aRows = in.sparseBlock; + SparseBlock a = in.sparseBlock; double[] c = out.denseBlock; if( rowwise ) @@ -1422,12 +1407,12 @@ public class LibMatrixReorg //note: cache-friendly on a and c for( int i=0, cix=0; i<rlen; i++, cix+=clen ) { - SparseRow arow = aRows[i]; - if( arow!=null && !arow.isEmpty() ){ - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) + if( !a.isEmpty(i) ) { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) c[cix+aix[j]] = avals[j]; } } @@ -1440,13 +1425,12 @@ public class LibMatrixReorg if( rlen==1 ) //VECTOR->MATRIX { //note: cache-friendly on a but not c - SparseRow arow = aRows[0]; - if( arow!=null && !arow.isEmpty() ){ - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) - { + if( !a.isEmpty(0) ){ + int apos = a.pos(0); + int alen = a.size(0); + int[] aix = a.indexes(0); + double[] avals = a.values(0); + for( int j=apos; j<apos+alen; j++ ) { int ci = aix[j]%rows; int cj = aix[j]/rows; c[ci*cols+cj] = avals[j]; @@ -1458,13 +1442,12 @@ public class LibMatrixReorg //note: cache-friendly on a but not c for( int i=0; i<rlen; i++ ) { - SparseRow arow = aRows[i]; - if( arow!=null && !arow.isEmpty() ){ - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) - { + if( !a.isEmpty(i) ){ + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) { int tmpix = aix[j]*rlen+i; int ci = tmpix%rows; int cj = tmpix/rows; @@ -1699,20 +1682,20 @@ public class LibMatrixReorg return; int rlen = in.rlen; - SparseRow[] aRows = in.sparseBlock; + SparseBlock a = in.sparseBlock; //append all values to right blocks MatrixIndexes ixtmp = new MatrixIndexes(); for( int i=0; i<rlen; i++ ) { - SparseRow arow = aRows[i]; - if( arow!=null && !arow.isEmpty() ) { + if( !a.isEmpty(i) ) { long ai = row_offset+i; - int alen = arow.size(); - int[] aix = arow.getIndexContainer(); - double[] avals = arow.getValueContainer(); - for( int j=0; j<alen; j++ ) - { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + + for( int j=apos; j<apos+alen; j++ ) { long aj = col_offset+aix[j]; computeResultBlockIndex(ixtmp, ai, aj, rows1, cols1, rows2, cols2, brlen2, bclen2, rowwise); MatrixBlock out = rix.get(ixtmp); @@ -1831,10 +1814,9 @@ public class LibMatrixReorg if( in.sparse ) //SPARSE { - SparseRow[] a = in.sparseBlock; - + SparseBlock a = in.sparseBlock; for ( int i=0; i < m; i++ ) - if ( a[i] != null && !a[i].isEmpty() ) { + if ( !a.isEmpty(i) ) { flags[i] = true; rlen2++; } @@ -1872,7 +1854,7 @@ public class LibMatrixReorg //note: output dense or sparse for( int i=0, cix=0; i<m; i++ ) if( flags[i] ) - ret.appendRow(cix++, in.sparseBlock[i]); + ret.appendRow(cix++, in.sparseBlock.get(i)); } else if( !in.sparse && !ret.sparse ) //DENSE <- DENSE { @@ -1930,13 +1912,14 @@ public class LibMatrixReorg flags = new boolean[ n ]; //false if( in.sparse ) //SPARSE { - SparseRow[] a = in.sparseBlock; + SparseBlock a = in.sparseBlock; for( int i=0; i<m; i++ ) - if ( a[i] != null && !a[i].isEmpty() ) { - int alen = a[i].size(); - int[] aix = a[i].getIndexContainer(); - for( int j=0; j<alen; j++ ) + if ( !a.isEmpty(i) ) { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + for( int j=apos; j<apos+alen; j++ ) flags[ aix[j] ] = true; } } @@ -1978,14 +1961,15 @@ public class LibMatrixReorg if( in.sparse ) //* <- SPARSE { //note: output dense or sparse - SparseRow[] a = in.sparseBlock; + SparseBlock a = in.sparseBlock; for( int i=0; i<m; i++ ) - if ( a[i] != null && !a[i].isEmpty() ) { - int alen = a[i].size(); - int[] aix = a[i].getIndexContainer(); - double[] avals = a[i].getValueContainer(); - for( int j=0; j<alen; j++ ) + if ( !a.isEmpty(i) ) { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) if( flags[aix[j]] ) ret.appendValue(i, cix[aix[j]], avals[j]); } @@ -2199,6 +2183,7 @@ public class LibMatrixReorg * @param ai * @param len */ + @SuppressWarnings("unused") private static void countAgg( int[] c, int[] ai, final int len ) { final int bn = len%8; @@ -2221,6 +2206,28 @@ public class LibMatrixReorg } } + private static void countAgg( int[] c, int[] aix, int ai, final int len ) + { + final int bn = len%8; + + //compute rest, not aligned to 8-block + for( int i=ai; i<ai+bn; i++ ) + c[ aix[i] ]++; + + //unrolled 8-block (for better instruction level parallelism) + for( int i=ai+bn; i<ai+len; i+=8 ) + { + c[ aix[ i+0 ] ] ++; + c[ aix[ i+1 ] ] ++; + c[ aix[ i+2 ] ] ++; + c[ aix[ i+3 ] ] ++; + c[ aix[ i+4 ] ] ++; + c[ aix[ i+5 ] ] ++; + c[ aix[ i+6 ] ] ++; + c[ aix[ i+7 ] ] ++; + } + } + /** * */
