[SYSTEMML-2145] Fix codegen row tmpl compiler/runtime for kmeans This patch fixes two issues - caused by recent changes of the codegen framework - that led to failures for large-scale kmeans runs. Specifically, this includes (1) a fix to retain dense output formats for incremental aggregation in spark codegen operations, and (2) a compiler modification regarding constant output sizes.
Furthermore, this also includes a harding for sparse-unsafe row operations over empty inputs. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d7e4c7c5 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d7e4c7c5 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d7e4c7c5 Branch: refs/heads/master Commit: d7e4c7c5fec81842b3c96fdd1a6c3f2ea7d0fa00 Parents: 056e48d Author: Matthias Boehm <[email protected]> Authored: Sat Feb 10 18:25:28 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sat Feb 10 18:25:28 2018 -0800 ---------------------------------------------------------------------- .../sysml/hops/codegen/template/TemplateRow.java | 7 +++++-- .../apache/sysml/runtime/codegen/SpoofRowwise.java | 15 ++++++++++++--- .../instructions/spark/SpoofSPInstruction.java | 4 +++- 3 files changed, 20 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c7c5/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java index 7cb67d0..f405516 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java @@ -53,6 +53,7 @@ import org.apache.sysml.hops.Hop.OpOp1; import org.apache.sysml.hops.Hop.OpOp2; import org.apache.sysml.hops.Hop.OpOpN; import org.apache.sysml.parser.Expression.DataType; +import org.apache.sysml.runtime.codegen.SpoofRowwise.RowType; import org.apache.sysml.runtime.matrix.data.LibMatrixMult; import org.apache.sysml.runtime.matrix.data.Pair; @@ -218,8 +219,10 @@ public class TemplateRow extends TemplateBase CNodeRow tpl = new CNodeRow(inputs, output); tpl.setRowType(TemplateUtils.getRowType(hop, inHops2.get("X"), inHops2.get("B1"))); - if( tpl.getRowType().isConstDim2(hop.getDim2()) ) - tpl.setConstDim2(hop.getDim2()); + long n2 = tpl.getRowType()==RowType.COL_AGG_B1 ? + hop.getDim1() : hop.getDim2(); + if( tpl.getRowType().isConstDim2(n2) ) + tpl.setConstDim2(n2); tpl.setNumVectorIntermediates(TemplateUtils .determineMinVectorIntermediates(output)); tpl.getOutput().resetVisitStatus(); http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c7c5/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index acc2e8d..b026b46 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -169,7 +169,8 @@ public abstract class SpoofRowwise extends SpoofOperator out = LibMatrixReorg.transpose(out, new MatrixBlock( out.getNumColumns(), out.getNumRows(), false)); } - out.examSparsity(); + if( !aggIncr ) + out.examSparsity(); return out; } @@ -289,8 +290,12 @@ public abstract class SpoofRowwise extends SpoofOperator } private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { - if( a == null ) + //forward empty block to sparse + if( a == null ) { + executeSparse(null, b, scalars, c, n, rl, ru); return; + } + SideInput[] lb = createSparseSideInputs(b, true); for( int i=rl; i<ru; i++ ) { genexec(a.values(i), a.pos(i), lb, scalars, @@ -314,8 +319,12 @@ public abstract class SpoofRowwise extends SpoofOperator } private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { - if( a.isEmptyBlock(false) ) + //forward empty block to sparse + if( a.isEmptyBlock(false) ) { + executeSparse(null, b, scalars, c, n, rl, ru); return; + } + SideInput[] lb = createSparseSideInputs(b, true); Iterator<double[]> iter = a.getDenseRowIterator(rl, ru); for( int i=rl; iter.hasNext(); i++ ) { http://git-wip-us.apache.org/repos/asf/systemml/blob/d7e4c7c5/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java index 4022201..82b34df 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java @@ -477,8 +477,10 @@ public class SpoofSPInstruction extends SPInstruction { //cleanup and final result preparations LibSpoofPrimitives.cleanupThreadLocalMemory(); - if( aggIncr ) + if( aggIncr ) { + blkOut.examSparsity(); //deferred format change ret.add(new Tuple2<>(new MatrixIndexes(1,1), blkOut)); + } return ret.iterator(); }
