This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 293fca19ce1a5d076a55968693e29540d3658b6f Author: baunsgaard <[email protected]> AuthorDate: Sun Nov 21 13:04:37 2021 +0100 [SYSTEMDS-3225] CLA spoof support This commit fixes spoof support via decompressions, and not actual compressed support. Previously there was spoof CLA tests hidden in the codegen tests, but they did not verify if the matrix was compressed. This is now moved to compression tests, and the compression instruction is verified. Closes #1462 --- .../hops/rewrite/RewriteCompressedReblock.java | 10 +- .../sysds/runtime/codegen/SpoofCellwise.java | 16 ++- .../sysds/runtime/codegen/SpoofMultiAggregate.java | 19 ++- .../sysds/runtime/codegen/SpoofOperator.java | 11 +- .../sysds/runtime/codegen/SpoofOuterProduct.java | 6 +- .../apache/sysds/runtime/codegen/SpoofRowwise.java | 4 + .../codegen/SparseSideInputTest.java | 145 ++++++++++----------- 7 files changed, 117 insertions(+), 94 deletions(-) diff --git a/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java b/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java index 96e1469..3c068ab 100644 --- a/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java +++ b/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java @@ -128,11 +128,13 @@ public class RewriteCompressedReblock extends StatementBlockRewriteRule { public static boolean satisfiesSizeConstraintsForCompression(Hop hop) { if(hop.getDim2() >= 1) { + final long x = hop.getDim1(); + final long y = hop.getDim2(); return - // If number of rows is above 1000 and either very sparse or number of columns is less than 100. - (hop.getDim1() >= 1000 && (hop.getDim2() < 100) || hop.getSparsity() < 0.0001) - // If relative ratio between number of rows and columns is better than 75, aka 75 rows per one column. - || hop.getDim1() / hop.getDim2() >= 75; + // If the Cube of the number of rows is greater than multiplying the number of columns by 1024. + y << 10 <= x * x + // is very sparse and at least 100 rows. + || (hop.getSparsity() < 0.0001 && y > 100); } return false; } diff --git a/src/main/java/org/apache/sysds/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysds/runtime/codegen/SpoofCellwise.java index a1d2b0f..63ef504 100644 --- a/src/main/java/org/apache/sysds/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysds/runtime/codegen/SpoofCellwise.java @@ -28,6 +28,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; @@ -44,8 +45,8 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.CommonThreadPool; import org.apache.sysds.runtime.util.UtilFunctions; -public abstract class SpoofCellwise extends SpoofOperator -{ +public abstract class SpoofCellwise extends SpoofOperator { + private static final long serialVersionUID = 3442528770573293590L; // these values need to match with their native counterparts (spoof cuda ops) @@ -146,6 +147,9 @@ public abstract class SpoofCellwise extends SpoofOperator //input preparation MatrixBlock a = inputs.get(0); + if(a instanceof CompressedMatrixBlock) + a = CompressedMatrixBlock.getUncompressed(a); + SideInput[] b = prepInputMatrices(inputs); double[] scalars = prepInputScalars(scalarObjects); final int m = a.getNumRows(); @@ -160,11 +164,11 @@ public abstract class SpoofCellwise extends SpoofOperator if( inputSize < PAR_NUMCELL_THRESHOLD ) { k = 1; //serial execution } - + double ret = 0; if( k <= 1 ) //SINGLE-THREADED { - if( !inputs.get(0).isInSparseFormat() ) + if( !a.isInSparseFormat() ) ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m, rix); else ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m, rix); @@ -226,6 +230,8 @@ public abstract class SpoofCellwise extends SpoofOperator //input preparation MatrixBlock a = inputs.get(0); + if(a instanceof CompressedMatrixBlock) + a = CompressedMatrixBlock.getUncompressed(a); SideInput[] b = prepInputMatrices(inputs); double[] scalars = prepInputScalars(scalarObjects); final int m = a.getNumRows(); @@ -855,7 +861,7 @@ public abstract class SpoofCellwise extends SpoofOperator { KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); - + //note: sequential scan algorithm for both sparse-safe and -unsafe //in order to avoid binary search for sparse-unsafe for(int i=rl; i<ru; i++) { diff --git a/src/main/java/org/apache/sysds/runtime/codegen/SpoofMultiAggregate.java b/src/main/java/org/apache/sysds/runtime/codegen/SpoofMultiAggregate.java index e9dc779..3b6cf3c 100644 --- a/src/main/java/org/apache/sysds/runtime/codegen/SpoofMultiAggregate.java +++ b/src/main/java/org/apache/sysds/runtime/codegen/SpoofMultiAggregate.java @@ -27,6 +27,7 @@ import java.util.concurrent.Future; import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.codegen.SpoofCellwise.AggOp; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; @@ -101,16 +102,20 @@ public abstract class SpoofMultiAggregate extends SpoofOperator //input preparation SideInput[] b = prepInputMatrices(inputs); double[] scalars = prepInputScalars(scalarObjects); - final int m = inputs.get(0).getNumRows(); - final int n = inputs.get(0).getNumColumns(); + MatrixBlock a = inputs.get(0); + final int m = a.getNumRows(); + final int n = a.getNumColumns(); boolean sparseSafe = isSparseSafe(); - + + if(a instanceof CompressedMatrixBlock) + a = CompressedMatrixBlock.getUncompressed(a); + if( k <= 1 ) //SINGLE-THREADED { - if( !inputs.get(0).isInSparseFormat() ) - executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix); + if( !a.isInSparseFormat() ) + executeDense(a.getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix); else - executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix); + executeSparse(a.getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix); } else //MULTI-THREADED { @@ -120,7 +125,7 @@ public abstract class SpoofMultiAggregate extends SpoofOperator int nk = UtilFunctions.roundToNext(Math.min(8*k,m/32), k); int blklen = (int)(Math.ceil((double)m/nk)); for( int i=0; i<nk & i*blklen<m; i++ ) - tasks.add(new ParAggTask(inputs.get(0), b, scalars, + tasks.add(new ParAggTask(a, b, scalars, m, n, sparseSafe, i*blklen, Math.min((i+1)*blklen, m))); //execute tasks List<Future<double[]>> taskret = pool.invokeAll(tasks); diff --git a/src/main/java/org/apache/sysds/runtime/codegen/SpoofOperator.java b/src/main/java/org/apache/sysds/runtime/codegen/SpoofOperator.java index 1ea229e..fe8d932 100644 --- a/src/main/java/org/apache/sysds/runtime/codegen/SpoofOperator.java +++ b/src/main/java/org/apache/sysds/runtime/codegen/SpoofOperator.java @@ -26,6 +26,7 @@ import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.instructions.cp.ScalarObject; @@ -37,7 +38,7 @@ import org.apache.sysds.runtime.util.UtilFunctions; public abstract class SpoofOperator implements Serializable { private static final long serialVersionUID = 3834006998853573319L; - private static final Log LOG = LogFactory.getLog(SpoofOperator.class.getName()); + protected static final Log LOG = LogFactory.getLog(SpoofOperator.class.getName()); protected static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements protected static final long PAR_MINFLOP_THRESHOLD = 2L*1024*1024; //MIN 2 MFLOP @@ -83,9 +84,11 @@ public abstract class SpoofOperator implements Serializable for(int i=offset; i<offset+len; i++) { //transpose if necessary int clen = inputs.get(i).getNumColumns(); - MatrixBlock in = (tB1 && i==1 ) ? LibMatrixReorg.transpose(inputs.get(i), - new MatrixBlock(clen, inputs.get(i).getNumRows(), false)) : inputs.get(i); - + MatrixBlock inn = inputs.get(i); + if(inn instanceof CompressedMatrixBlock) + inn = CompressedMatrixBlock.getUncompressed(inn); + MatrixBlock in = (tB1 && i==1 ) ? LibMatrixReorg.transpose(inn, + new MatrixBlock(clen, inn.getNumRows(), false)) : inn; //create side input if( denseOnly && (in.isInSparseFormat() || !in.isAllocated()) ) { //convert empty or sparse to dense temporary block (note: we don't do diff --git a/src/main/java/org/apache/sysds/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysds/runtime/codegen/SpoofOuterProduct.java index 6430788..3621be7 100644 --- a/src/main/java/org/apache/sysds/runtime/codegen/SpoofOuterProduct.java +++ b/src/main/java/org/apache/sysds/runtime/codegen/SpoofOuterProduct.java @@ -28,6 +28,7 @@ import java.util.concurrent.Future; import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.instructions.cp.DoubleObject; @@ -95,6 +96,9 @@ public abstract class SpoofOuterProduct extends SpoofOperator MatrixBlock a = inputs.get(0); MatrixBlock out = new MatrixBlock(1, 1, false); out.allocateDenseBlock(); + + if(a instanceof CompressedMatrixBlock) + a = CompressedMatrixBlock.getUncompressed(a); if( !a.isInSparseFormat() ) executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); @@ -474,7 +478,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator //NOTE: we don't create sparse side inputs w/ row-major cursors because //cache blocking would lead to non-sequential access - final int blocksizeIJ = (int) (8L*m*n/nnz); + final int blocksizeIJ = (int) (8L*m*n/Math.max(nnz,1)); int[] curk = new int[Math.min(blocksizeIJ, ru-rl)]; if( !out.isInSparseFormat() ) //DENSE diff --git a/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java index f8983e1..fae7605 100644 --- a/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java @@ -28,6 +28,7 @@ import java.util.concurrent.Future; import java.util.stream.IntStream; import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.controlprogram.caching.MatrixObject; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.DenseBlockFactory; @@ -175,6 +176,9 @@ public abstract class SpoofRowwise extends SpoofOperator //core sequential execute MatrixBlock a = inputs.get(0); + if(a instanceof CompressedMatrixBlock) + a = CompressedMatrixBlock.getUncompressed(a); + if( !a.isInSparseFormat() ) executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m, rix); else diff --git a/src/test/java/org/apache/sysds/test/functions/codegen/SparseSideInputTest.java b/src/test/java/org/apache/sysds/test/functions/compress/codegen/SparseSideInputTest.java similarity index 63% rename from src/test/java/org/apache/sysds/test/functions/codegen/SparseSideInputTest.java rename to src/test/java/org/apache/sysds/test/functions/compress/codegen/SparseSideInputTest.java index 62d74df..7ae7a68 100644 --- a/src/test/java/org/apache/sysds/test/functions/codegen/SparseSideInputTest.java +++ b/src/test/java/org/apache/sysds/test/functions/compress/codegen/SparseSideInputTest.java @@ -17,7 +17,9 @@ * under the License. */ -package org.apache.sysds.test.functions.codegen; +package org.apache.sysds.test.functions.compress.codegen; + +import static org.junit.Assert.assertTrue; import java.io.File; import java.util.HashMap; @@ -25,161 +27,159 @@ import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysds.common.Types.ExecMode; -import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.common.Types.ExecType; +import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex; import org.apache.sysds.test.AutomatedTestBase; import org.apache.sysds.test.TestConfiguration; import org.apache.sysds.test.TestUtils; -import org.junit.Assert; import org.junit.Test; -public class SparseSideInputTest extends AutomatedTestBase -{ +public class SparseSideInputTest extends AutomatedTestBase { private static final Log LOG = LogFactory.getLog(SparseSideInputTest.class.getName()); private static final String TEST_NAME = "SparseSideInput"; - private static final String TEST_NAME1 = TEST_NAME+"1"; //row sum(X/rowSums(X)+Y) - private static final String TEST_NAME2 = TEST_NAME+"2"; //cell sum(abs(X^2)+Y) - private static final String TEST_NAME3 = TEST_NAME+"3"; //magg sum(X^2), sum(X+Y) - private static final String TEST_NAME4 = TEST_NAME+"4"; //outer sum((X!=0) * (U %*% t(V) - Y)) - + private static final String TEST_NAME1 = TEST_NAME + "1"; // row sum(X/rowSums(X)+Y) + private static final String TEST_NAME2 = TEST_NAME + "2"; // cell sum(abs(X^2)+Y) + private static final String TEST_NAME3 = TEST_NAME + "3"; // magg sum(X^2), sum(X+Y) + private static final String TEST_NAME4 = TEST_NAME + "4"; // outer sum((X!=0) * (U %*% t(V) - Y)) + private static final String TEST_DIR = "functions/codegen/"; private static final String TEST_CLASS_DIR = TEST_DIR + SparseSideInputTest.class.getSimpleName() + "/"; private static String TEST_CONF1 = "SystemDS-config-codegen.xml"; private static String TEST_CONF2 = "SystemDS-config-codegen-compress.xml"; private static String TEST_CONF = TEST_CONF1; - + private static final int rows = 1798; private static final int cols = 784; private static final double sparsity = 0.1; private static final double eps = Math.pow(10, -7); - + @Override public void setUp() { TestUtils.clearAssertionInformation(); - for(int i=1; i<=4; i++) - addTestConfiguration( TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) }) ); + for(int i = 1; i <= 4; i++) + addTestConfiguration(TEST_NAME + i, + new TestConfiguration(TEST_CLASS_DIR, TEST_NAME + i, new String[] {String.valueOf(i)})); } - + @Test public void testCodegenRowULASparseSideInputCP() { - testCodegenIntegration( TEST_NAME1, false, ExecType.CP ); + testCodegenIntegration(TEST_NAME1, false, ExecType.CP); } - + @Test public void testCodegenRowCLASparseSideInputCP() { - testCodegenIntegration( TEST_NAME1, true, ExecType.CP ); + testCodegenIntegration(TEST_NAME1, true, ExecType.CP); } - + @Test public void testCodegenRowULASparseSideInputSP() { - testCodegenIntegration( TEST_NAME1, false, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME1, false, ExecType.SPARK); } - + @Test public void testCodegenRowCLASparseSideInputSP() { - testCodegenIntegration( TEST_NAME1, true, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME1, true, ExecType.SPARK); } - + @Test public void testCodegenCellULASparseSideInputCP() { - testCodegenIntegration( TEST_NAME2, false, ExecType.CP ); + testCodegenIntegration(TEST_NAME2, false, ExecType.CP); } - + @Test public void testCodegenCellCLASparseSideInputCP() { - testCodegenIntegration( TEST_NAME2, true, ExecType.CP ); + testCodegenIntegration(TEST_NAME2, true, ExecType.CP); } - + @Test public void testCodegenCellULASparseSideInputSP() { - testCodegenIntegration( TEST_NAME2, false, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME2, false, ExecType.SPARK); } - + @Test public void testCodegenCellCLASparseSideInputSP() { - testCodegenIntegration( TEST_NAME2, true, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME2, true, ExecType.SPARK); } - + @Test public void testCodegenMaggULASparseSideInputCP() { - testCodegenIntegration( TEST_NAME3, false, ExecType.CP ); + testCodegenIntegration(TEST_NAME3, false, ExecType.CP); } - + @Test public void testCodegenMaggCLASparseSideInputCP() { - testCodegenIntegration( TEST_NAME3, true, ExecType.CP ); + testCodegenIntegration(TEST_NAME3, true, ExecType.CP); } - + @Test public void testCodegenMaggULASparseSideInputSP() { - testCodegenIntegration( TEST_NAME3, false, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME3, false, ExecType.SPARK); } - + @Test public void testCodegenMaggCLASparseSideInputSP() { - testCodegenIntegration( TEST_NAME3, true, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME3, true, ExecType.SPARK); } - + @Test public void testCodegenOuterULASparseSideInputCP() { - testCodegenIntegration( TEST_NAME4, false, ExecType.CP ); + testCodegenIntegration(TEST_NAME4, false, ExecType.CP); } - + @Test public void testCodegenOuterCLASparseSideInputCP() { - testCodegenIntegration( TEST_NAME4, true, ExecType.CP ); + testCodegenIntegration(TEST_NAME4, true, ExecType.CP); } - + @Test public void testCodegenOuterULASparseSideInputSP() { - testCodegenIntegration( TEST_NAME4, false, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME4, false, ExecType.SPARK); } - + @Test public void testCodegenOuterCLASparseSideInputSP() { - testCodegenIntegration( TEST_NAME4, true, ExecType.SPARK ); + testCodegenIntegration(TEST_NAME4, true, ExecType.SPARK); } - - private void testCodegenIntegration( String testname, boolean compress, ExecType instType ) - { + + private void testCodegenIntegration(String testname, boolean compress, ExecType instType) { boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION; ExecMode platformOld = setExecMode(instType); - - try - { + + try { TEST_CONF = compress ? TEST_CONF2 : TEST_CONF1; OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = false; - + TestConfiguration config = getTestConfiguration(testname); loadTestConfiguration(config); - + String HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = HOME + testname + ".dml"; - programArgs = new String[]{"-stats","-explain", "-args", - input("X"), input("Y"), output("R") }; - + programArgs = new String[] {"-stats", "-explain", "-args", input("X"), input("Y"), output("R")}; + fullRScriptName = HOME + testname + ".R"; rCmd = getRCmd(inputDir(), expectedDir()); - - //generate inputs + + // generate inputs double[][] X = getRandomMatrix(rows, cols, 0, 1, sparsity, 7); double[][] Y = getRandomMatrix(rows, cols, 0, 1, sparsity, 3); writeInputMatrixWithMTD("X", X, true); writeInputMatrixWithMTD("Y", Y, true); - - //run dml and r scripts - LOG.debug(fullDMLScriptName); - LOG.debug(runTest(true, false, null, -1)); - runRScript(true); - - //compare matrices - HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); - HashMap<CellIndex, Double> rfile = readRMatrixFromExpectedDir("R"); - TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R"); - Assert.assertTrue(heavyHittersContainsSubString("spoof") - || heavyHittersContainsSubString("sp_spoof")); + + // run dml and r scripts + String ret = runTest(null).toString(); + runRScript(true); + + LOG.debug(ret); + + // compare matrices + HashMap<CellIndex, Double> dmlResult = readDMLMatrixFromOutputDir("R"); + HashMap<CellIndex, Double> rResult = readRMatrixFromExpectedDir("R"); + TestUtils.compareMatrices(dmlResult, rResult, eps, "Stat-DML", "Stat-R"); + assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof")); + if(compress) + assertTrue(heavyHittersContainsSubString("compress") || heavyHittersContainsSubString("sp_compress")); } finally { resetExecMode(platformOld); @@ -188,12 +188,11 @@ public class SparseSideInputTest extends AutomatedTestBase OptimizerUtils.ALLOW_OPERATOR_FUSION = true; } } - + @Override protected File getConfigTemplateFile() { // Instrumentation in this test's output log to show custom configuration file used for template. File f = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF); - LOG.info("This test case overrides default configuration with " + f.getPath()); return f; } }
