Repository: systemml Updated Branches: refs/heads/master f6b8b74a2 -> cb58c7c16
[SYSTEMML-2119] Fix codegen cell over compressed matrices (seq-aware) This patch fixes the codegen cell aggregate operations over compressed matrices, which compute the output solely over distinct values and their counts for sparse-safe operations. We now guard this special case against operators that contain sequence operations because these operations require the specific row indexes, and thus individual value instances and their positions, to compute correct results. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/cb58c7c1 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/cb58c7c1 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/cb58c7c1 Branch: refs/heads/master Commit: cb58c7c162a09531ad06cda62d347da5adea7359 Parents: f6b8b74 Author: Matthias Boehm <[email protected]> Authored: Sat May 12 13:59:56 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat May 12 13:59:56 2018 -0700 ---------------------------------------------------------------------- .../sysml/hops/codegen/cplan/CNodeCell.java | 12 +++- .../hops/codegen/template/TemplateCell.java | 11 ++++ .../sysml/runtime/codegen/SpoofCellwise.java | 13 ++-- .../codegen/CompressedCellwiseTest.java | 67 +++++++++++++++++++- .../codegen/CompressedCellwiseSumSeq.R | 28 ++++++++ .../codegen/CompressedCellwiseSumSeq.dml | 24 +++++++ 6 files changed, 147 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/cb58c7c1/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java index e97a00a..39946cc 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java @@ -39,7 +39,7 @@ public class CNodeCell extends CNodeTpl + "\n" + "public final class %TMP% extends SpoofCellwise {\n" + " public %TMP%() {\n" - + " super(CellType.%TYPE%, %SPARSE_SAFE%, %AGG_OP%);\n" + + " super(CellType.%TYPE%, %SPARSE_SAFE%, %SEQ%, %AGG_OP%);\n" + " }\n" + " protected double genexec(double a, SideInput[] b, double[] scalars, int m, int n, long grix, int rix, int cix) { \n" + "%BODY_dense%" @@ -50,6 +50,7 @@ public class CNodeCell extends CNodeTpl private CellType _type = null; private AggOp _aggOp = null; private boolean _sparseSafe = false; + private boolean _containsSeq = true; private boolean _requiresCastdtm = false; private boolean _multipleConsumers = false; @@ -91,6 +92,14 @@ public class CNodeCell extends CNodeTpl return _sparseSafe; } + public void setContainsSeq(boolean flag) { + _containsSeq = flag; + } + + public boolean containsSeq() { + return _containsSeq; + } + public void setRequiresCastDtm(boolean flag) { _requiresCastdtm = flag; _hash = 0; @@ -124,6 +133,7 @@ public class CNodeCell extends CNodeTpl tmp = tmp.replace("%TYPE%", getCellType().name()); tmp = tmp.replace("%AGG_OP%", (_aggOp!=null) ? "AggOp."+_aggOp.name() : "null" ); tmp = tmp.replace("%SPARSE_SAFE%", String.valueOf(isSparseSafe())); + tmp = tmp.replace("%SEQ%", String.valueOf(containsSeq())); return tmp; } http://git-wip-us.apache.org/repos/asf/systemml/blob/cb58c7c1/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java index 8eaffaf..0ce35c6 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java @@ -145,6 +145,7 @@ public class TemplateCell extends TemplateBase tpl.setAggOp(TemplateUtils.getAggOp(hop)); tpl.setSparseSafe(isSparseSafe(Arrays.asList(hop), sinHops[0], Arrays.asList(tpl.getOutput()), Arrays.asList(tpl.getAggOp()), false)); + tpl.setContainsSeq(rContainsSeq(tpl.getOutput(), new HashSet<>())); tpl.setRequiresCastDtm(hop instanceof AggBinaryOp); tpl.setBeginLine(hop.getBeginLine()); @@ -345,6 +346,16 @@ public class TemplateCell extends TemplateBase return ret; } + protected boolean rContainsSeq(CNode node, HashSet<Long> memo) { + if( memo.contains(node.getID()) ) + return false; + boolean ret = TemplateUtils.isBinary(node, BinType.SEQ_RIX); + for( CNode c : node.getInput() ) + ret |= rContainsSeq(c, memo); + memo.add(node.getID()); + return ret; + } + /** * Comparator to order input hops of the cell template. We try to order * matrices-vectors-scalars via sorting by number of cells and for http://git-wip-us.apache.org/repos/asf/systemml/blob/cb58c7c1/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index 5ec18f5..7b225dd 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -73,11 +73,13 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl private final CellType _type; private final AggOp _aggOp; private final boolean _sparseSafe; + private final boolean _containsSeq; - public SpoofCellwise(CellType type, boolean sparseSafe, AggOp aggOp) { + public SpoofCellwise(CellType type, boolean sparseSafe, boolean containsSeq, AggOp aggOp) { _type = type; _aggOp = aggOp; _sparseSafe = sparseSafe; + _containsSeq = containsSeq; } public CellType getCellType() { @@ -92,6 +94,10 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl return _sparseSafe; } + public boolean containsSeq() { + return _containsSeq; + } + @Override public String getSpoofType() { return "Cell" + getClass().getName().split("\\.")[1]; @@ -1051,15 +1057,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private double executeCompressedAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru, long rix) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { - //TODO handle sequences in special case summation KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); KahanObject kbuff2 = new KahanObject(0, 0); //special case: computation over value-tuples only - if( sparseSafe && b.length==0 && !a.hasUncompressedColGroup() ) { + if( sparseSafe && b.length==0 && !a.hasUncompressedColGroup() && !containsSeq()) { //note: all remaining groups are guaranteed ColGroupValue boolean entireGrp = (rl==0 && ru==a.getNumRows()); int maxNumVals = a.getColGroups().stream().mapToInt( http://git-wip-us.apache.org/repos/asf/systemml/blob/cb58c7c1/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java index 641584e..5530686 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java @@ -39,6 +39,7 @@ public class CompressedCellwiseTest extends AutomatedTestBase private static final String TEST_NAME1 = "CompressedCellwiseMain"; private static final String TEST_NAME2 = "CompressedCellwiseSide"; private static final String TEST_NAME3 = "CompressedCellwiseSumSq"; + private static final String TEST_NAME4 = "CompressedCellwiseSumSeq"; private static final String TEST_DIR = "functions/codegen/"; private static final String TEST_CLASS_DIR = TEST_DIR + CompressedCellwiseTest.class.getSimpleName() + "/"; @@ -71,6 +72,7 @@ public class CompressedCellwiseTest extends AutomatedTestBase addTestConfiguration( TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }) ); addTestConfiguration( TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[] { "R" }) ); addTestConfiguration( TEST_NAME3, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME3, new String[] { "R" }) ); + addTestConfiguration( TEST_NAME4, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME4, new String[] { "R" }) ); } @Test @@ -316,7 +318,66 @@ public class CompressedCellwiseTest extends AutomatedTestBase public void testCompressedCellwiseSumSqEmptyRand3CP() { testCompressedCellwise( TEST_NAME3, SparsityType.EMPTY, ValueType.RAND_ROUND_OLE, ExecType.CP ); } + + @Test + public void testCompressedCellwiseSumSeqDenseConstCP() { + testCompressedCellwise( TEST_NAME4, SparsityType.DENSE, ValueType.CONST, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqDenseRandCP() { + testCompressedCellwise( TEST_NAME4, SparsityType.DENSE, ValueType.RAND, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqDenseRand2CP() { + testCompressedCellwise( TEST_NAME4, SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqDenseRand3CP() { + testCompressedCellwise( TEST_NAME4, SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqSparseConstCP() { + testCompressedCellwise( TEST_NAME4, SparsityType.SPARSE, ValueType.CONST, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqSparseRandCP() { + testCompressedCellwise( TEST_NAME4, SparsityType.SPARSE, ValueType.RAND, ExecType.CP ); + } + @Test + public void testCompressedCellwiseSumSeqSparseRand2CP() { + testCompressedCellwise( TEST_NAME4, SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqSparseRand3CP() { + testCompressedCellwise( TEST_NAME4, SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqEmptyConstCP() { + testCompressedCellwise( TEST_NAME4, SparsityType.EMPTY, ValueType.CONST, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqEmptyRandCP() { + testCompressedCellwise( TEST_NAME4, SparsityType.EMPTY, ValueType.RAND, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqEmptyRand2CP() { + testCompressedCellwise( TEST_NAME4, SparsityType.EMPTY, ValueType.RAND_ROUND_DDC, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseSumSeqEmptyRand3CP() { + testCompressedCellwise( TEST_NAME4, SparsityType.EMPTY, ValueType.RAND_ROUND_OLE, ExecType.CP ); + } private void testCompressedCellwise(String testname, SparsityType stype, ValueType vtype, ExecType et) { @@ -340,11 +401,11 @@ public class CompressedCellwiseTest extends AutomatedTestBase String HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = HOME + testname + ".dml"; - programArgs = new String[]{"-explain", "-stats", + programArgs = new String[]{"-explain", "-stats", "-args", input("X"), output("R") }; fullRScriptName = HOME + testname + ".R"; - rCmd = getRCmd(inputDir(), expectedDir()); + rCmd = getRCmd(inputDir(), expectedDir()); //generate input data double sparsity = -1; @@ -384,7 +445,7 @@ public class CompressedCellwiseTest extends AutomatedTestBase OptimizerUtils.ALLOW_OPERATOR_FUSION = true; CompressedMatrixBlock.ALLOW_DDC_ENCODING = true; } - } + } /** * Override default configuration with custom test configuration to ensure http://git-wip-us.apache.org/repos/asf/systemml/blob/cb58c7c1/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.R b/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.R new file mode 100644 index 0000000..d22accd --- /dev/null +++ b/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.R @@ -0,0 +1,28 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) +library("Matrix") +library("matrixStats") + +X = readMM(paste(args[1], "X.mtx", sep="")); +R = as.matrix(sum(X*(seq(1,nrow(X))%*%matrix(1,1,ncol(X))))); +writeMM(as(R,"CsparseMatrix"), paste(args[2], "R", sep="")); http://git-wip-us.apache.org/repos/asf/systemml/blob/cb58c7c1/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.dml b/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.dml new file mode 100644 index 0000000..550f3e3 --- /dev/null +++ b/src/test/scripts/functions/codegen/CompressedCellwiseSumSeq.dml @@ -0,0 +1,24 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +X = read($1); +R = as.matrix(sum(X*seq(1,nrow(X)))); +write(R, $2);
