Group together types of e-wise multiply inputs. Comprehensive test.
The test tests all different kinds of objects multiplied togehter.
The new order of element-wise multiply chains is as follows:
<pre>
(((unknown * object * frame) * ([least-nnz-matrix * matrix] *
most-nnz-matrix))
* ([least-nnz-row-vector * row-vector] * most-nnz-row-vector))
* ([[scalars * least-nnz-col-vector] * col-vector] * most-nnz-col-vector)
</pre>
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/999fdfbc
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/999fdfbc
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/999fdfbc
Branch: refs/heads/master
Commit: 999fdfbca9ebd855e031e4b812b64f1b484a33d8
Parents: 6c3e1c5
Author: Dylan Hutchison <[email protected]>
Authored: Tue Jul 11 22:10:17 2017 -0700
Committer: Dylan Hutchison <[email protected]>
Committed: Tue Jul 11 22:10:17 2017 -0700
----------------------------------------------------------------------
...RewriteElementwiseMultChainOptimization.java | 122 ++++++++++++++---
...ElementwiseMultChainOptimizationAllTest.java | 134 +++++++++++++++++++
.../functions/misc/RewriteEMultChainOpAll.R | 37 +++++
.../functions/misc/RewriteEMultChainOpAll.dml | 31 +++++
4 files changed, 305 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/999fdfbc/src/main/java/org/apache/sysml/hops/rewrite/RewriteElementwiseMultChainOptimization.java
----------------------------------------------------------------------
diff --git
a/src/main/java/org/apache/sysml/hops/rewrite/RewriteElementwiseMultChainOptimization.java
b/src/main/java/org/apache/sysml/hops/rewrite/RewriteElementwiseMultChainOptimization.java
index 9cc8fcd..486072b 100644
---
a/src/main/java/org/apache/sysml/hops/rewrite/RewriteElementwiseMultChainOptimization.java
+++
b/src/main/java/org/apache/sysml/hops/rewrite/RewriteElementwiseMultChainOptimization.java
@@ -46,6 +46,14 @@ import com.google.common.collect.Multiset;
*
* Does not rewrite in the presence of foreign parents in the middle of the
e-wise multiply chain,
* since foreign parents may rely on the individual results.
+ *
+ * The new order of element-wise multiply chains is as follows:
+ * <pre>
+ * (((unknown * object * frame) * ([least-nnz-matrix * matrix] *
most-nnz-matrix))
+ * * ([least-nnz-row-vector * row-vector] * most-nnz-row-vector))
+ * * ([[scalars * least-nnz-col-vector] * col-vector] *
most-nnz-col-vector)
+ * </pre>
+ * Identical elements are replaced with powers.
*/
public class RewriteElementwiseMultChainOptimization extends HopRewriteRule {
@Override
@@ -137,14 +145,90 @@ public class RewriteElementwiseMultChainOptimization
extends HopRewriteRule {
// Construct right-deep EMult tree
final Iterator<Map.Entry<Hop, Integer>> iterator =
sorted.entrySet().iterator();
- Hop first = constructPower(iterator.next());
- for (int i = 1; i < sorted.size(); i++) {
- final Hop second = constructPower(iterator.next());
- first = HopRewriteUtils.createBinary(second, first,
Hop.OpOp2.MULT);
- first.setVisited();
+ Hop next = iterator.hasNext() ? constructPower(iterator.next())
: null;
+ Hop colVectorsScalars = null;
+ while(next != null &&
+ (next.getDataType() ==
Expression.DataType.SCALAR
+ || next.getDataType() ==
Expression.DataType.MATRIX && next.getDim2() == 1))
+ {
+ if( colVectorsScalars == null )
+ colVectorsScalars = next;
+ else {
+ colVectorsScalars =
HopRewriteUtils.createBinary(next, colVectorsScalars, Hop.OpOp2.MULT);
+ colVectorsScalars.setVisited();
+ }
+ next = iterator.hasNext() ?
constructPower(iterator.next()) : null;
+ }
+ // next is not processed and is either null or past col vectors
+
+ Hop rowVectors = null;
+ while(next != null &&
+ (next.getDataType() ==
Expression.DataType.MATRIX && next.getDim1() == 1))
+ {
+ if( rowVectors == null )
+ rowVectors = next;
+ else {
+ rowVectors =
HopRewriteUtils.createBinary(rowVectors, next, Hop.OpOp2.MULT);
+ rowVectors.setVisited();
+ }
+ next = iterator.hasNext() ?
constructPower(iterator.next()) : null;
+ }
+ // next is not processed and is either null or past row vectors
+
+ Hop matrices = null;
+ while(next != null &&
+ (next.getDataType() ==
Expression.DataType.MATRIX))
+ {
+ if( matrices == null )
+ matrices = next;
+ else {
+ matrices =
HopRewriteUtils.createBinary(matrices, next, Hop.OpOp2.MULT);
+ matrices.setVisited();
+ }
+ next = iterator.hasNext() ?
constructPower(iterator.next()) : null;
}
- return first;
+ // next is not processed and is either null or past matrices
+
+ Hop other = null;
+ while(next != null)
+ {
+ if( other == null )
+ other = next;
+ else {
+ other = HopRewriteUtils.createBinary(other,
next, Hop.OpOp2.MULT);
+ other.setVisited();
+ }
+ next = iterator.hasNext() ?
constructPower(iterator.next()) : null;
+ }
+ // finished
+
+ // ((other * matrices) * rowVectors) * colVectorsScalars
+ Hop top = null;
+ if( other == null && matrices != null )
+ top = matrices;
+ else if( other != null && matrices == null )
+ top = other;
+ else if( other != null ) { //matrices != null
+ top = HopRewriteUtils.createBinary(other, matrices,
Hop.OpOp2.MULT);
+ top.setVisited();
+ }
+
+ if( top == null && rowVectors != null )
+ top = rowVectors;
+ else if( rowVectors != null ) { //top != null
+ top = HopRewriteUtils.createBinary(top, rowVectors,
Hop.OpOp2.MULT);
+ top.setVisited();
+ }
+
+ if( top == null && colVectorsScalars != null )
+ top = colVectorsScalars;
+ else if( colVectorsScalars != null ) { //top != null
+ top = HopRewriteUtils.createBinary(top,
colVectorsScalars, Hop.OpOp2.MULT);
+ top.setVisited();
+ }
+
+ return top;
}
private static Hop constructPower(final Map.Entry<Hop, Integer> entry) {
@@ -154,7 +238,7 @@ public class RewriteElementwiseMultChainOptimization
extends HopRewriteRule {
hop.setVisited(); // we will visit the leaves' children next
if (cnt == 1)
return hop;
- Hop pow = HopRewriteUtils.createBinary(hop, new LiteralOp(cnt),
Hop.OpOp2.POW);
+ final Hop pow = HopRewriteUtils.createBinary(hop, new
LiteralOp(cnt), Hop.OpOp2.POW);
pow.setVisited();
return pow;
}
@@ -162,8 +246,8 @@ public class RewriteElementwiseMultChainOptimization
extends HopRewriteRule {
/**
* A Comparator that orders Hops by their data type, dimention, and
sparsity.
* The order is as follows:
- * scalars > col vectors > row vectors >
- * non-vector matrices ordered by sparsity (higher nnz first,
unknown sparsity last) >
+ * scalars < col vectors < row vectors <
+ * non-vector matrices ordered by sparsity (higher nnz last,
unknown sparsity last) >
* other data types.
* Disambiguate by Hop ID.
*/
@@ -172,11 +256,11 @@ public class RewriteElementwiseMultChainOptimization
extends HopRewriteRule {
{
for (int i = 0, valuesLength =
Expression.DataType.values().length; i < valuesLength; i++)
switch(Expression.DataType.values()[i]) {
- case SCALAR: orderDataType[i] = 4; break;
- case MATRIX: orderDataType[i] = 3; break;
+ case SCALAR: orderDataType[i] = 0; break;
+ case MATRIX: orderDataType[i] = 1; break;
case FRAME: orderDataType[i] = 2; break;
- case OBJECT: orderDataType[i] = 1; break;
- case UNKNOWN:orderDataType[i] = 0; break;
+ case OBJECT: orderDataType[i] = 3; break;
+ case UNKNOWN:orderDataType[i] = 4; break;
}
}
@@ -190,15 +274,15 @@ public class RewriteElementwiseMultChainOptimization
extends HopRewriteRule {
case MATRIX:
// two matrices; check for vectors
if (o1.getDim2() == 1) { // col vector
- if (o2.getDim2() != 1) return
1; // col vectors are greatest of matrices
+ if (o2.getDim2() != 1) return
-1; // col vectors are greatest of matrices
return
compareBySparsityThenId(o1, o2); // both col vectors
} else if (o2.getDim2() == 1) { // 2 is col
vector; 1 is not
- return -1; // col vectors are
the greatest matrices
+ return 1; // col vectors are
the greatest matrices
} else if (o1.getDim1() == 1) { // row vector
- if (o2.getDim1() != 1) return
1; // row vectors greater than non-vectors
+ if (o2.getDim1() != 1) return
-1; // row vectors greater than non-vectors
return
compareBySparsityThenId(o1, o2); // both row vectors
} else if (o2.getDim1() == 1) { // 2 is row
vector; 1 is not
- return 1; // col vectors
greater than non-vectors
+ return 1; // row vectors
greater than non-vectors
} else { // both non-vectors
return
compareBySparsityThenId(o1, o2);
}
@@ -209,10 +293,10 @@ public class RewriteElementwiseMultChainOptimization
extends HopRewriteRule {
private int compareBySparsityThenId(final Hop o1, final Hop o2)
{
// the hop with more nnz is first; unknown nnz (-1) last
final int c = Long.compare(o1.getNnz(), o2.getNnz());
- if (c != 0) return c;
+ if (c != 0) return -c;
return Long.compare(o1.getHopID(), o2.getHopID());
}
- }.reversed();
+ };
/**
* Check if a node has a parent that is not in the set of emults.
Recursively check children who are also emults.
http://git-wip-us.apache.org/repos/asf/systemml/blob/999fdfbc/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteElementwiseMultChainOptimizationAllTest.java
----------------------------------------------------------------------
diff --git
a/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteElementwiseMultChainOptimizationAllTest.java
b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteElementwiseMultChainOptimizationAllTest.java
new file mode 100644
index 0000000..ba5c78d
--- /dev/null
+++
b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteElementwiseMultChainOptimizationAllTest.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.misc;
+
+import java.util.HashMap;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test rewriting `2*X*3*v*5*w*4*z*5*Y*2*v*2*X`, where `v` and `z` are row
vectors and `w` is a column vector,
+ * successfully rewrites to `Y*(X^2)*z*(v^2)*w*2400`.
+ */
+public class RewriteElementwiseMultChainOptimizationAllTest extends
AutomatedTestBase
+{
+ private static final String TEST_NAME1 = "RewriteEMultChainOpAll";
+ private static final String TEST_DIR = "functions/misc/";
+ private static final String TEST_CLASS_DIR = TEST_DIR +
RewriteElementwiseMultChainOptimizationAllTest.class.getSimpleName() + "/";
+
+ private static final int rows = 123;
+ private static final int cols = 321;
+ private static final double eps = Math.pow(10, -10);
+
+ @Override
+ public void setUp() {
+ TestUtils.clearAssertionInformation();
+ addTestConfiguration( TEST_NAME1, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }) );
+ }
+
+ @Test
+ public void testMatrixMultChainOptNoRewritesCP() {
+ testRewriteMatrixMultChainOp(TEST_NAME1, false, ExecType.CP);
+ }
+
+ @Test
+ public void testMatrixMultChainOptNoRewritesSP() {
+ testRewriteMatrixMultChainOp(TEST_NAME1, false, ExecType.SPARK);
+ }
+
+ @Test
+ public void testMatrixMultChainOptRewritesCP() {
+ testRewriteMatrixMultChainOp(TEST_NAME1, true, ExecType.CP);
+ }
+
+ @Test
+ public void testMatrixMultChainOptRewritesSP() {
+ testRewriteMatrixMultChainOp(TEST_NAME1, true, ExecType.SPARK);
+ }
+
+ private void testRewriteMatrixMultChainOp(String testname, boolean
rewrites, ExecType et)
+ {
+ RUNTIME_PLATFORM platformOld = rtplatform;
+ switch( et ){
+ case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+ case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+ default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
break;
+ }
+
+ boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+ if( rtplatform == RUNTIME_PLATFORM.SPARK )
+ DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+ boolean rewritesOld = OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES;
+ OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES = rewrites;
+
+ try
+ {
+ TestConfiguration config =
getTestConfiguration(testname);
+ loadTestConfiguration(config);
+
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ fullDMLScriptName = HOME + testname + ".dml";
+ programArgs = new String[] { "-explain", "hops",
"-stats", "-args", input("X"), input("Y"), input("v"), input("z"), input("w"),
output("R") };
+ fullRScriptName = HOME + testname + ".R";
+ rCmd = getRCmd(inputDir(), expectedDir());
+
+ double Xsparsity = 0.8, Ysparsity = 0.6;
+ double[][] X = getRandomMatrix(rows, cols, -1, 1,
Xsparsity, 7);
+ double[][] Y = getRandomMatrix(rows, cols, -1, 1,
Ysparsity, 3);
+ double[][] z = getRandomMatrix(1, cols, -1, 1,
Ysparsity, 5);
+ double[][] v = getRandomMatrix(1, cols, -1, 1,
Xsparsity, 4);
+ double[][] w = getRandomMatrix(rows, 1, -1, 1,
Ysparsity, 6);
+ writeInputMatrixWithMTD("X", X, true);
+ writeInputMatrixWithMTD("Y", Y, true);
+ writeInputMatrixWithMTD("z", z, true);
+ writeInputMatrixWithMTD("v", v, true);
+ writeInputMatrixWithMTD("w", w, true);
+
+ //execute tests
+ runTest(true, false, null, -1);
+ runRScript(true);
+
+ //compare matrices
+ HashMap<CellIndex, Double> dmlfile =
readDMLMatrixFromHDFS("R");
+ HashMap<CellIndex, Double> rfile =
readRMatrixFromFS("R");
+ TestUtils.compareMatrices(dmlfile, rfile, eps,
"Stat-DML", "Stat-R");
+
+ //check for presence of power operator, if we did a
rewrite
+ if( rewrites ) {
+
Assert.assertTrue(heavyHittersContainsSubString("^2"));
+ }
+ }
+ finally {
+ OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES = rewritesOld;
+ rtplatform = platformOld;
+ DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/systemml/blob/999fdfbc/src/test/scripts/functions/misc/RewriteEMultChainOpAll.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteEMultChainOpAll.R
b/src/test/scripts/functions/misc/RewriteEMultChainOpAll.R
new file mode 100644
index 0000000..20f76c2
--- /dev/null
+++ b/src/test/scripts/functions/misc/RewriteEMultChainOpAll.R
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args <- commandArgs(TRUE)
+# args[1]=""
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+Y = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+v = as.matrix(readMM(paste(args[1], "v.mtx", sep="")))
+z = as.matrix(readMM(paste(args[1], "z.mtx", sep="")))
+w = as.matrix(readMM(paste(args[1], "w.mtx", sep="")))
+
+R = 2* X *3* X *5* Y *4*5*2*2* (matrix(1,length(w),1)%*%z) *
(matrix(1,length(w),1)%*%v)^2 * (w%*%matrix(1,1,length(v)))
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep=""));
http://git-wip-us.apache.org/repos/asf/systemml/blob/999fdfbc/src/test/scripts/functions/misc/RewriteEMultChainOpAll.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/misc/RewriteEMultChainOpAll.dml
b/src/test/scripts/functions/misc/RewriteEMultChainOpAll.dml
new file mode 100644
index 0000000..90f9242
--- /dev/null
+++ b/src/test/scripts/functions/misc/RewriteEMultChainOpAll.dml
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = read($1);
+Y = read($2);
+v = read($3);
+z = read($4);
+w = read($5);
+
+R = 2* X *3* v *5* w *4* z *5* Y *2* v *2* X
+
+write(R, $6);
\ No newline at end of file