Repository: systemml Updated Branches: refs/heads/master e25800f04 -> f3e3bdd78
[SYSTEMML-2046] Large dense blocks in compressed linear algebra ops This patch modifies all compressed linear algebra operations that potentially deal with large dense blocks to properly use the new dense block abstraction. Specifically this only applied to row sums and row sumSq implementations---where output vectors might be large blocks due to corrections---of all encoding formats. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f3e3bdd7 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f3e3bdd7 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f3e3bdd7 Branch: refs/heads/master Commit: f3e3bdd7882a7d2eae4c672963ff8d6e90ac2b8f Parents: e25800f Author: Matthias Boehm <[email protected]> Authored: Sun Dec 31 21:23:22 2017 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sun Dec 31 21:23:22 2017 -0800 ---------------------------------------------------------------------- .../sysml/runtime/compress/ColGroupDDC1.java | 27 +++++++----- .../sysml/runtime/compress/ColGroupDDC2.java | 14 +++--- .../sysml/runtime/compress/ColGroupOLE.java | 45 +++++++++++--------- .../sysml/runtime/compress/ColGroupOffset.java | 4 +- .../sysml/runtime/compress/ColGroupRLE.java | 38 ++++++++++------- 5 files changed, 75 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/f3e3bdd7/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java index e14f27f..117a1d3 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC1.java @@ -30,6 +30,7 @@ import org.apache.sysml.runtime.compress.utils.ConverterUtils; import org.apache.sysml.runtime.functionobjects.KahanFunction; import org.apache.sysml.runtime.functionobjects.KahanPlus; import org.apache.sysml.runtime.instructions.cp.KahanObject; +import org.apache.sysml.runtime.matrix.data.DenseBlock; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.operators.ScalarOperator; @@ -162,7 +163,7 @@ public class ColGroupDDC1 extends ColGroupDDC public long getExactSizeOnDisk() { long ret = 12; //header //col indices - ret += 4 * _colIndexes.length; + ret += 4 * _colIndexes.length; //distinct values (groups of values) ret += 8 * _values.length; //data @@ -340,9 +341,10 @@ public class ColGroupDDC1 extends ColGroupDDC @Override protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) { + //note: due to corrections the output might be a large dense block + DenseBlock c = result.getDenseBlock(); KahanObject kbuff = new KahanObject(0, 0); KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject(); - double[] c = result.getDenseBlockValues(); //pre-aggregate nnz per value tuple double[] vals = sumAllValues(kplus, kbuff, false); @@ -350,19 +352,22 @@ public class ColGroupDDC1 extends ColGroupDDC //scan data and add to result (use kahan plus not general KahanFunction //for correctness in case of sqk+) for( int i=rl; i<ru; i++ ) { - kbuff.set(c[2*i], c[2*i+1]); + double[] cvals = c.values(i); + int cix = c.pos(i); + kbuff.set(cvals[cix], cvals[cix+1]); kplus2.execute2(kbuff, vals[_data[i]&0xFF]); - c[2*i] = kbuff._sum; - c[2*i+1] = kbuff._correction; + cvals[cix] = kbuff._sum; + cvals[cix+1] = kbuff._correction; } } public static void computeRowSums(ColGroupDDC1[] grps, MatrixBlock result, KahanFunction kplus, int rl, int ru) throws DMLRuntimeException { + //note: due to corrections the output might be a large dense block + DenseBlock c = result.getDenseBlock(); KahanObject kbuff = new KahanObject(0, 0); KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject(); - double[] c = result.getDenseBlockValues(); //prepare distinct values once double[][] vals = new double[grps.length][]; @@ -387,11 +392,13 @@ public class ColGroupDDC1 extends ColGroupDDC } //add partial results of all ddc groups for( int i=bi; i<Math.min(bi+blksz, ru); i++ ) { - kbuff.set(c[2*i], c[2*i+1]); + double[] cvals = c.values(i); + int cix = c.pos(i); + kbuff.set(cvals[cix], cvals[cix+1]); kplus2.execute2(kbuff, tmpAgg[i-bi]); - c[2*i] = kbuff._sum; - c[2*i+1] = kbuff._correction; - } + cvals[cix] = kbuff._sum; + cvals[cix+1] = kbuff._correction; + } } } http://git-wip-us.apache.org/repos/asf/systemml/blob/f3e3bdd7/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java index b046350..6303ff0 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupDDC2.java @@ -29,6 +29,7 @@ import org.apache.sysml.runtime.compress.utils.ConverterUtils; import org.apache.sysml.runtime.functionobjects.KahanFunction; import org.apache.sysml.runtime.functionobjects.KahanPlus; import org.apache.sysml.runtime.instructions.cp.KahanObject; +import org.apache.sysml.runtime.matrix.data.DenseBlock; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.operators.ScalarOperator; @@ -298,7 +299,7 @@ public class ColGroupDDC2 extends ColGroupDDC postScaling(vals, c); } else //general case - { + { //iterate over codes, compute all, and add to the result for( int i=0; i<nrow; i++ ) { double aval = a.getData(i, 0); @@ -339,9 +340,10 @@ public class ColGroupDDC2 extends ColGroupDDC @Override protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) { + //note: due to corrections the output might be a large dense block + DenseBlock c = result.getDenseBlock(); KahanObject kbuff = new KahanObject(0, 0); KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject(); - double[] c = result.getDenseBlockValues(); //pre-aggregate nnz per value tuple double[] vals = sumAllValues(kplus, kbuff, false); @@ -349,10 +351,12 @@ public class ColGroupDDC2 extends ColGroupDDC //scan data and add to result (use kahan plus not general KahanFunction //for correctness in case of sqk+) for( int i=rl; i<ru; i++ ) { - kbuff.set(c[2*i], c[2*i+1]); + double[] cvals = c.values(i); + int cix = c.pos(i); + kbuff.set(cvals[cix], cvals[cix+1]); kplus2.execute2(kbuff, vals[_data[i]]); - c[2*i] = kbuff._sum; - c[2*i+1] = kbuff._correction; + cvals[cix] = kbuff._sum; + cvals[cix+1] = kbuff._correction; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/f3e3bdd7/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java index b655804..e466369 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java @@ -31,6 +31,7 @@ import org.apache.sysml.runtime.functionobjects.Builtin; import org.apache.sysml.runtime.functionobjects.KahanFunction; import org.apache.sysml.runtime.functionobjects.KahanPlus; import org.apache.sysml.runtime.instructions.cp.KahanObject; +import org.apache.sysml.runtime.matrix.data.DenseBlock; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.operators.ScalarOperator; @@ -92,7 +93,7 @@ public class ColGroupOLE extends ColGroupOffset bix += _data[boff+bix] + 1; } _skiplist[k] = bix; - } + } } //debug output @@ -124,12 +125,12 @@ public class ColGroupOLE extends ColGroupOffset //cache blocking config and position array int[] apos = skipScan(numVals, rl); - + //cache conscious append via horizontal scans for( int bi=rl; bi<ru; bi+=blksz ) { for (int k = 0, off=0; k < numVals; k++, off+=numCols) { int boff = _ptr[k]; - int blen = len(k); + int blen = len(k); int bix = apos[k]; if( bix >= blen ) continue; @@ -161,18 +162,18 @@ public class ColGroupOLE extends ColGroupOffset final int n = getNumRows(); //cache blocking config and position array - int[] apos = new int[numVals]; + int[] apos = new int[numVals]; int[] cix = new int[numCols]; //prepare target col indexes for( int j=0; j<numCols; j++ ) cix[j] = colixTargets[_colIndexes[j]]; - //cache conscious append via horizontal scans + //cache conscious append via horizontal scans for( int bi=0; bi<n; bi+=blksz ) { for (int k = 0, off=0; k < numVals; k++, off+=numCols) { int boff = _ptr[k]; - int blen = len(k); + int blen = len(k); int bix = apos[k]; if( bix >= blen ) continue; @@ -282,12 +283,12 @@ public class ColGroupOLE extends ColGroupOffset applyScalarOp(op), _data, _ptr); } - double[] rvalues = applyScalarOp(op, val0, getNumCols()); + double[] rvalues = applyScalarOp(op, val0, getNumCols()); char[] lbitmap = BitmapEncoder.genOffsetBitmap(loff, loff.length); char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length); System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length); int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length+1); - rbitmapOffs[rbitmapOffs.length-1] = rbitmaps.length; + rbitmapOffs[rbitmapOffs.length-1] = rbitmaps.length; return new ColGroupOLE(_colIndexes, _numRows, loff.length<_numRows, rvalues, rbitmaps, rbitmapOffs); @@ -368,7 +369,7 @@ public class ColGroupOLE extends ColGroupOffset if( rl > 0 ){ for (; bix<blen & off<rl; bix += slen+1, off += blksz) { slen = _data[boff+bix]; - } + } } //compute partial results @@ -435,7 +436,7 @@ public class ColGroupOLE extends ColGroupOffset //step 3: scale partial results by values and write to global output for (int k = 0, valOff=0; k < numVals; k++, valOff+=numCols) for( int j = 0; j < numCols; j++ ) - c[ _colIndexes[j] ] += cvals[k] * _values[valOff+j]; + c[ _colIndexes[j] ] += cvals[k] * _values[valOff+j]; } else { @@ -513,12 +514,12 @@ public class ColGroupOLE extends ColGroupOffset @Override protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) { + //note: due to corrections the output might be a large dense block + DenseBlock c = result.getDenseBlock(); KahanObject kbuff = new KahanObject(0, 0); KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject(); - final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; final int numVals = getNumValues(); - double[] c = result.getDenseBlockValues(); if( ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz ) @@ -549,10 +550,12 @@ public class ColGroupOLE extends ColGroupOffset //compute partial results for (int i = 0; i < len; i++) { int rix = ii + _data[pos + i]; - kbuff.set(c[2*rix], c[2*rix+1]); + double[] cvals = c.values(rix); + int cix = c.pos(rix); + kbuff.set(cvals[cix], cvals[cix+1]); kplus2.execute2(kbuff, val); - c[2*rix] = kbuff._sum; - c[2*rix+1] = kbuff._correction; + cvals[cix] = kbuff._sum; + cvals[cix+1] = kbuff._correction; } bix += len + 1; } @@ -579,10 +582,12 @@ public class ColGroupOLE extends ColGroupOffset slen = _data[boff+bix]; for (int i = 1; i <= slen; i++) { int rix = off + _data[boff+bix + i]; - kbuff.set(c[2*rix], c[2*rix+1]); + double[] cvals = c.values(rix); + int cix = c.pos(rix); + kbuff.set(cvals[cix], cvals[cix+1]); kplus2.execute2(kbuff, val); - c[2*rix] = kbuff._sum; - c[2*rix+1] = kbuff._correction; + cvals[cix] = kbuff._sum; + cvals[cix+1] = kbuff._correction; } } } @@ -735,7 +740,7 @@ public class ColGroupOLE extends ColGroupOffset int[] ret = allocIVector(numVals, rl==0); final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; - if( rl > 0 ) { //rl aligned with blksz + if( rl > 0 ) { //rl aligned with blksz int rskip = (getNumRows()/2/blksz)*blksz; for( int k = 0; k < numVals; k++ ) { @@ -756,7 +761,7 @@ public class ColGroupOLE extends ColGroupOffset private int skipScanVal(int k, int rl) { final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; - if( rl > 0 ) { //rl aligned with blksz + if( rl > 0 ) { //rl aligned with blksz int rskip = (getNumRows()/2/blksz)*blksz; int boff = _ptr[k]; int blen = len(k); http://git-wip-us.apache.org/repos/asf/systemml/blob/f3e3bdd7/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java index f8b0b6b..3603b9e 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java @@ -309,7 +309,7 @@ public abstract class ColGroupOffset extends ColGroupValue //read bitmaps int totalLen = in.readInt(); _ptr = new int[numVals+1]; - _data = new char[totalLen]; + _data = new char[totalLen]; for( int i=0, off=0; i<numVals; i++ ) { int len = in.readInt(); _ptr[i] = off; @@ -343,7 +343,7 @@ public abstract class ColGroupOffset extends ColGroupValue int totalLen = 0; for( int i=0; i<numVals; i++ ) totalLen += len(i); - out.writeInt(totalLen); + out.writeInt(totalLen); for( int i=0; i<numVals; i++ ) { int len = len(i); int off = _ptr[i]; http://git-wip-us.apache.org/repos/asf/systemml/blob/f3e3bdd7/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java index fdce4f5..876012f 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java @@ -31,6 +31,7 @@ import org.apache.sysml.runtime.functionobjects.Builtin; import org.apache.sysml.runtime.functionobjects.KahanFunction; import org.apache.sysml.runtime.functionobjects.KahanPlus; import org.apache.sysml.runtime.instructions.cp.KahanObject; +import org.apache.sysml.runtime.matrix.data.DenseBlock; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.Pair; import org.apache.sysml.runtime.matrix.operators.ScalarOperator; @@ -107,7 +108,7 @@ public class ColGroupRLE extends ColGroupOffset //cache conscious append via horizontal scans for( int bi=rl; bi<ru; bi+=blksz ) { - int bimax = Math.min(bi+blksz, ru); + int bimax = Math.min(bi+blksz, ru); for (int k=0, off=0; k < numVals; k++, off+=numCols) { int boff = _ptr[k]; int blen = len(k); @@ -122,7 +123,7 @@ public class ColGroupRLE extends ColGroupOffset target.appendValue(i, _colIndexes[j], _values[off+j]); start += len; } - apos[k] = bix; + apos[k] = bix; astart[k] = start; } } @@ -155,7 +156,7 @@ public class ColGroupRLE extends ColGroupOffset //cache conscious append via horizontal scans for( int bi=0; bi<n; bi+=blksz ) { - int bimax = Math.min(bi+blksz, n); + int bimax = Math.min(bi+blksz, n); for (int k=0, off=0; k < numVals; k++, off+=numCols) { int boff = _ptr[k]; int blen = len(k); @@ -172,7 +173,7 @@ public class ColGroupRLE extends ColGroupOffset target.appendValue(i, cix[j], _values[off+j]); start += len; } - apos[k] = bix; + apos[k] = bix; astart[k] = start; } } @@ -333,8 +334,8 @@ public class ColGroupRLE extends ColGroupOffset int bix = 0; int start = 0; - //scan to beginning offset if necessary - if( rl > 0 ) { //rl aligned with blksz + //scan to beginning offset if necessary + if( rl > 0 ) { //rl aligned with blksz while( bix<blen ) { int lstart = _data[boff + bix]; //start int llen = _data[boff + bix + 1]; //len @@ -390,7 +391,7 @@ public class ColGroupRLE extends ColGroupOffset //horizontal scan, incl pos maintenance for (int k = 0; k < numVals; k++) { int boff = _ptr[k]; - int blen = len(k); + int blen = len(k); int bix = apos[k]; int start = astart[k]; @@ -491,7 +492,7 @@ public class ColGroupRLE extends ColGroupOffset applyScalarOp(op), _data, _ptr); } - double[] rvalues = applyScalarOp(op, val0, getNumCols()); + double[] rvalues = applyScalarOp(op, val0, getNumCols()); char[] lbitmap = BitmapEncoder.genRLEBitmap(loff, loff.length); char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length); System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length); @@ -534,11 +535,12 @@ public class ColGroupRLE extends ColGroupOffset @Override protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) { + //note: due to corrections the output might be a large dense block + DenseBlock c = result.getDenseBlock(); KahanObject kbuff = new KahanObject(0, 0); KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject(); final int numVals = getNumValues(); - double[] c = result.getDenseBlockValues(); if( ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 @@ -573,10 +575,12 @@ public class ColGroupRLE extends ColGroupOffset int from = Math.max(bi, start+lstart); int to = Math.min(start+lstart+llen,bimax); for (int rix=from; rix<to; rix++) { - kbuff.set(c[2*rix], c[2*rix+1]); + double[] cvals = c.values(rix); + int cix = c.pos(rix); + kbuff.set(cvals[cix], cvals[cix+1]); kplus2.execute2(kbuff, val); - c[2*rix] = kbuff._sum; - c[2*rix+1] = kbuff._correction; + cvals[cix] = kbuff._sum; + cvals[cix+1] = kbuff._correction; } if(start+lstart+llen >= bimax) break; @@ -605,10 +609,12 @@ public class ColGroupRLE extends ColGroupOffset curRunStartOff = curRunEnd + _data[boff+bix]; curRunEnd = curRunStartOff + _data[boff+bix+1]; for (int rix=curRunStartOff; rix<curRunEnd && rix<ru; rix++) { - kbuff.set(c[2*rix], c[2*rix+1]); + double[] cvals = c.values(rix); + int cix = c.pos(rix); + kbuff.set(cvals[cix], cvals[cix+1]); kplus2.execute2(kbuff, val); - c[2*rix] = kbuff._sum; - c[2*rix+1] = kbuff._correction; + cvals[cix] = kbuff._sum; + cvals[cix+1] = kbuff._correction; } } } @@ -770,7 +776,7 @@ public class ColGroupRLE extends ColGroupOffset int blen = len(k); int bix = 0; int start = 0; - while( bix<blen ) { + while( bix<blen ) { int lstart = _data[boff + bix]; //start int llen = _data[boff + bix + 1]; //len if( start+lstart+llen >= rl )
