[SYSTEMML-1836] Fix unnecessary GC overhead in codegen row operations This patch fixes issues of huge garbage collection overhead in special cases of codegen row operations with relatively large intermediate rows, matrix side inputs, and especially for multi-threaded operations. The major problem was that, for aggregations, the temporary output per thread was allocated as the product of the two temporary vector sizes (as necessary for matrix multiplication outputs), which however is often completely unnecessary.
On a scenario of row operations for sum( (X != 0) * (UV - X) ^ 2) over small inputs of 10K x 3K, this patch improved performance for 10 iterations from 80s (74s GC) to 1.4s (0.074s GC). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/586f8229 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/586f8229 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/586f8229 Branch: refs/heads/master Commit: 586f82299292019583b68dd98ccb8fbb71a2fcd3 Parents: b6b6772 Author: Matthias Boehm <mboe...@gmail.com> Authored: Sun Oct 15 21:50:17 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sun Oct 15 21:50:17 2017 -0700 ---------------------------------------------------------------------- .../sysml/runtime/codegen/LibSpoofPrimitives.java | 11 +++++++++-- .../apache/sysml/runtime/codegen/SpoofRowwise.java | 14 +++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/586f8229/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java index 8444b5f..7624d96 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java @@ -1829,9 +1829,9 @@ public class LibSpoofPrimitives private int _len2; public VectorBuffer(int num, int len1, int len2) { - int lnum = (len2 > 0) ? 2*num : num; + int lnum = (len2>0 && len1!=len2) ? 2*num : num; _data = new double[lnum][]; - for( int i=0; i<num; i++ ) + for( int i=0; i<num; i++ ) { if( lnum > num ) { _data[2*i] = new double[len1]; _data[2*i+1] = new double[len2]; @@ -1839,6 +1839,7 @@ public class LibSpoofPrimitives else { _data[i] = new double[len1]; } + } _pos = -1; _len1 = len1; _len2 = len2; @@ -1851,5 +1852,11 @@ public class LibSpoofPrimitives } while( _data[_pos].length!=len ); return _data[_pos]; } + @SuppressWarnings("unused") + public boolean isReusable(int num, int len1, int len2) { + int lnum = (len2>0 && len1!=len2) ? 2*num : num; + return (_len1 == len1 && _len2 == len2 + && _data.length == lnum); + } } } http://git-wip-us.apache.org/repos/asf/systemml/blob/586f8229/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index d8a747b..9d5675b 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -209,8 +209,9 @@ public abstract class SpoofRowwise extends SpoofOperator if( _type.isColumnAgg() || _type == RowType.FULL_AGG ) { //execute tasks ArrayList<ParColAggTask> tasks = new ArrayList<>(); + int outLen = out.getNumRows() * out.getNumColumns(); for( int i=0; i<nk & i*blklen<m; i++ ) - tasks.add(new ParColAggTask(a, b, scalars, n, n2, i*blklen, Math.min((i+1)*blklen, m))); + tasks.add(new ParColAggTask(a, b, scalars, n, n2, outLen, i*blklen, Math.min((i+1)*blklen, m))); List<Future<double[]>> taskret = pool.invokeAll(tasks); //aggregate partial results int len = _type.isColumnAgg() ? out.getNumRows()*out.getNumColumns() : 1; @@ -343,17 +344,16 @@ public abstract class SpoofRowwise extends SpoofOperator private final MatrixBlock _a; private final SideInput[] _b; private final double[] _scalars; - private final int _clen; - private final int _clen2; - private final int _rl; - private final int _ru; + private final int _clen, _clen2, _outLen; + private final int _rl, _ru; - protected ParColAggTask( MatrixBlock a, SideInput[] b, double[] scalars, int clen, int clen2, int rl, int ru ) { + protected ParColAggTask( MatrixBlock a, SideInput[] b, double[] scalars, int clen, int clen2, int outLen, int rl, int ru ) { _a = a; _b = b; _scalars = scalars; _clen = clen; _clen2 = clen2; + _outLen = outLen; _rl = rl; _ru = ru; } @@ -364,7 +364,7 @@ public abstract class SpoofRowwise extends SpoofOperator //allocate vector intermediates and partial output if( _reqVectMem > 0 ) LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2); - double[] c = new double[(_clen2>0)?_clen*_clen2 : _clen]; + double[] c = new double[_outLen]; if( _a instanceof CompressedMatrixBlock ) executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru);