[SYSTEMML-2314] Recompiler extensions for improved list-matrix lookups This patch introduces additional rewrites to better handle lists during dynamic recompilation. When compiling DAGs with as.matrix(X) or as.matrix(X[5]), where X is a list, the matrix dimensions of the output are unknown because they reflect the selected data object. However, often during dynamic recompilation of individual DAGs we have 1-entry lists or known index expressions, which allows us to replace these patterns with the direct reference to the selected matrix, which allows compiling the subsequent operations in the same DAG with unknown sizes and thus, avoid unnecessary distributed operations.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e2875cae Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e2875cae Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e2875cae Branch: refs/heads/master Commit: e2875cae4fe9e367113f80b25f579489819d04fc Parents: ae86c3f Author: Matthias Boehm <[email protected]> Authored: Thu May 10 20:32:21 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu May 10 20:32:48 2018 -0700 ---------------------------------------------------------------------- .../hops/recompile/LiteralReplacement.java | 49 +++++++++++++++++++- .../java/org/apache/sysml/lops/compile/Dag.java | 5 +- .../functions/misc/ListAndStructTest.java | 5 ++ 3 files changed, 55 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/e2875cae/src/main/java/org/apache/sysml/hops/recompile/LiteralReplacement.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/recompile/LiteralReplacement.java b/src/main/java/org/apache/sysml/hops/recompile/LiteralReplacement.java index 575e187..7c5014c 100644 --- a/src/main/java/org/apache/sysml/hops/recompile/LiteralReplacement.java +++ b/src/main/java/org/apache/sysml/hops/recompile/LiteralReplacement.java @@ -34,11 +34,14 @@ import org.apache.sysml.hops.Hop.DataOpTypes; import org.apache.sysml.hops.Hop.Direction; import org.apache.sysml.hops.Hop.OpOp1; import org.apache.sysml.hops.rewrite.HopRewriteUtils; +import org.apache.sysml.lops.compile.Dag; import org.apache.sysml.parser.Expression.DataType; +import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.LocalVariableMap; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; import org.apache.sysml.runtime.instructions.cp.Data; +import org.apache.sysml.runtime.instructions.cp.ListObject; import org.apache.sysml.runtime.instructions.cp.ScalarObject; import org.apache.sysml.runtime.instructions.cp.ScalarObjectFactory; import org.apache.sysml.runtime.matrix.data.MatrixBlock; @@ -49,7 +52,7 @@ public class LiteralReplacement //internal configuration parameters private static final long REPLACE_LITERALS_MAX_MATRIX_SIZE = 1000000; //10^6 cells (8MB) - private static final boolean REPORT_LITERAL_REPLACE_OPS_STATS = true; + private static final boolean REPORT_LITERAL_REPLACE_OPS_STATS = true; protected static void rReplaceLiterals( Hop hop, LocalVariableMap vars, boolean scalarsOnly ) { @@ -73,6 +76,8 @@ public class LiteralReplacement lit = (lit==null) ? replaceLiteralValueTypeCastRightIndexing(c, vars) : lit; lit = (lit==null) ? replaceLiteralFullUnaryAggregate(c, vars) : lit; lit = (lit==null) ? replaceLiteralFullUnaryAggregateRightIndexing(c, vars) : lit; + lit = (lit==null) ? replaceTReadMatrixFromList(c, vars) : lit; + lit = (lit==null) ? replaceTReadMatrixLookupFromList(c, vars) : lit; } //replace hop w/ literal on demand @@ -342,7 +347,47 @@ public class LiteralReplacement return ret; } - + + private static DataOp replaceTReadMatrixFromList( Hop c, LocalVariableMap vars ) { + //pattern: as.matrix(X) or as.matrix(X) with X being a list + DataOp ret = null; + if( HopRewriteUtils.isUnary(c, OpOp1.CAST_AS_MATRIX) ) { + Hop in = c.getInput().get(0); + if( in.getDataType() == DataType.LIST + && HopRewriteUtils.isData(in, DataOpTypes.TRANSIENTREAD) ) { + ListObject list = (ListObject)vars.get(in.getName()); + String varname = Dag.getNextUniqueVarname(DataType.MATRIX); + MatrixObject mo = (MatrixObject) list.slice(0); + vars.put(varname, mo); + ret = HopRewriteUtils.createTransientRead(varname, c); + } + } + return ret; + } + + private static DataOp replaceTReadMatrixLookupFromList( Hop c, LocalVariableMap vars ) { + //pattern: as.matrix(X[i:i]) or as.matrix(X['a','a']) with X being a list + DataOp ret = null; + if( HopRewriteUtils.isUnary(c, OpOp1.CAST_AS_MATRIX) + && c.getInput().get(0) instanceof IndexingOp ) { + Hop ix = c.getInput().get(0); + Hop ixIn = c.getInput().get(0).getInput().get(0); + if( ixIn.getDataType() == DataType.LIST + && HopRewriteUtils.isData(ixIn, DataOpTypes.TRANSIENTREAD) + && ix.getInput().get(1) instanceof LiteralOp + && ix.getInput().get(2) instanceof LiteralOp + && ix.getInput().get(1) == ix.getInput().get(2) ) { + ListObject list = (ListObject)vars.get(ixIn.getName()); + String varname = Dag.getNextUniqueVarname(DataType.MATRIX); + LiteralOp lit = (LiteralOp) ix.getInput().get(1); + MatrixObject mo = (MatrixObject) ((lit.getValueType() == ValueType.STRING) ? + list.slice(lit.getName()) : list.slice((int)lit.getLongValue()-1)); + vars.put(varname, mo); + ret = HopRewriteUtils.createTransientRead(varname, c); + } + } + return ret; + } /////////////////////////////// // Utility functions http://git-wip-us.apache.org/repos/asf/systemml/blob/e2875cae/src/main/java/org/apache/sysml/lops/compile/Dag.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/compile/Dag.java b/src/main/java/org/apache/sysml/lops/compile/Dag.java index 3244a3b..bc080d3 100644 --- a/src/main/java/org/apache/sysml/lops/compile/Dag.java +++ b/src/main/java/org/apache/sysml/lops/compile/Dag.java @@ -212,8 +212,9 @@ public class Dag<N extends Lop> } public static String getNextUniqueVarname(DataType dt) { - return (dt==DataType.MATRIX ? Lop.MATRIX_VAR_NAME_PREFIX : - Lop.FRAME_VAR_NAME_PREFIX) + var_index.getNextID(); + return (dt.isMatrix() ? Lop.MATRIX_VAR_NAME_PREFIX : + dt.isFrame() ? Lop.FRAME_VAR_NAME_PREFIX : + Lop.SCALAR_VAR_NAME_PREFIX) + var_index.getNextID(); } /////// http://git-wip-us.apache.org/repos/asf/systemml/blob/e2875cae/src/test/java/org/apache/sysml/test/integration/functions/misc/ListAndStructTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/misc/ListAndStructTest.java b/src/test/java/org/apache/sysml/test/integration/functions/misc/ListAndStructTest.java index 0d3707c..4c33847 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/misc/ListAndStructTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/misc/ListAndStructTest.java @@ -29,6 +29,7 @@ import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; import org.apache.sysml.test.integration.AutomatedTestBase; import org.apache.sysml.test.integration.TestConfiguration; import org.apache.sysml.test.utils.TestUtils; +import org.apache.sysml.utils.Statistics; public class ListAndStructTest extends AutomatedTestBase { @@ -114,6 +115,10 @@ public class ListAndStructTest extends AutomatedTestBase HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("R"); HashMap<CellIndex, Double> rfile = readRMatrixFromFS("R"); Assert.assertEquals(dmlfile.get(new CellIndex(1,1)), rfile.get(new CellIndex(1,1))); + + //check for properly compiled CP operations + Assert.assertTrue(Statistics.getNoOfExecutedMRJobs()==0); + Assert.assertTrue(Statistics.getNoOfExecutedSPInst()==0); } finally { OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
