Repository: incubator-systemml Updated Branches: refs/heads/master a5584c0fd -> 3841ca88e
[SYSTEMML-694] Improved transpose-matmult lop compilation, for lstm Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7c5b83c1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7c5b83c1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7c5b83c1 Branch: refs/heads/master Commit: 7c5b83c1455baaf5e18d37587b1f709af8c1a8c7 Parents: a5584c0 Author: Matthias Boehm <[email protected]> Authored: Sat Jul 23 18:06:51 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat Jul 23 18:06:51 2016 -0700 ---------------------------------------------------------------------- .../java/org/apache/sysml/hops/AggBinaryOp.java | 9 ++++++--- .../java/org/apache/sysml/hops/ReorgOp.java | 20 ++++++++++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7c5b83c1/src/main/java/org/apache/sysml/hops/AggBinaryOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java index 0532d01..ea58ebd 100644 --- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java @@ -695,20 +695,23 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop { Hop X = getInput().get(0).getInput().get(0); //guaranteed to exists Hop Y = getInput().get(1); + int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); //right vector transpose - Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP); + Lop lY = Y.constructLops(); + Lop tY = (lY instanceof Transform && ((Transform)lY).getOperationType()==OperationTypes.Transpose ) ? + lY.getInputs().get(0) : //if input is already a transpose, avoid redundant transpose ops + new Transform(lY, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP, k); tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz()); setLineNumbers(tY); //matrix mult - int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); Lop mult = new Binary(tY, X.constructLops(), Binary.OperationTypes.MATMULT, getDataType(), getValueType(), ExecType.CP, k); mult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz()); setLineNumbers(mult); //result transpose (dimensions set outside) - Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP); + Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP, k); return out; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7c5b83c1/src/main/java/org/apache/sysml/hops/ReorgOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java b/src/main/java/org/apache/sysml/hops/ReorgOp.java index d283d16..5f5138b 100644 --- a/src/main/java/org/apache/sysml/hops/ReorgOp.java +++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java @@ -31,6 +31,7 @@ import org.apache.sysml.lops.LopsException; import org.apache.sysml.lops.SortKeys; import org.apache.sysml.lops.Transform; import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.lops.Transform.OperationTypes; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; @@ -131,12 +132,19 @@ public class ReorgOp extends Hop implements MultiThreadedHop { case TRANSPOSE: { - int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); - Transform transform1 = new Transform( getInput().get(0).constructLops(), - HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k); - setOutputDimensions(transform1); - setLineNumbers(transform1); - setLops(transform1); + Lop lin = getInput().get(0).constructLops(); + if( lin instanceof Transform && ((Transform)lin).getOperationType()==OperationTypes.Transpose ) + setLops(lin.getInputs().get(0)); //if input is already a transpose, avoid redundant transpose ops + else if( getDim1()==1 && getDim2()==1 ) + setLops(lin); //if input of size 1x1, avoid unnecessary transpose + else { //general case + int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); + Transform transform1 = new Transform( lin, + HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k); + setOutputDimensions(transform1); + setLineNumbers(transform1); + setLops(transform1); + } break; } case DIAG:
