Repository: systemml Updated Branches: refs/heads/master f8dc5c18b -> 728b2c904
[SYSTEMML-2275] Fix incorrect expected nnz on reading matrices from HDFS This patch fixes a severe issue (introduced with SYSTEMML-2217, i.e., after our 1.1 release) of incorrect expected nnz on reading matrices from HDFS. In detail the data converter API required expected sparsity which was incorrectly computed for (nnz x cols, rows) instead (rows x cols, nnz). We now directly pass the nnz to the readers. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e8774a7a Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e8774a7a Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e8774a7a Branch: refs/heads/master Commit: e8774a7a272d1ba4060020b143de6b969e3daabf Parents: f8dc5c1 Author: Matthias Boehm <[email protected]> Authored: Sun Apr 22 15:45:29 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Apr 22 15:45:29 2018 -0700 ---------------------------------------------------------------------- .../controlprogram/caching/MatrixObject.java | 4 +--- .../apache/sysml/runtime/io/ReadProperties.java | 5 ++--- .../sysml/runtime/util/DataConverter.java | 20 +++++++------------- .../caching/CachingPWriteExportTest.java | 15 +++++---------- .../data/FullStringInitializeTest.java | 3 ++- 5 files changed, 17 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java index 678ddc0..ca704e0 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java @@ -431,10 +431,8 @@ public class MatrixObject extends CacheableData<MatrixBlock> } //read matrix and maintain meta data - double sparsity = (mc.getNonZeros() < 0) ? (iimd.getInputInfo().isTextIJV()?-1:1) : - OptimizerUtils.getSparsity(mc.getNonZeros(),mc.getRows(),mc.getCols()); MatrixBlock newData = DataConverter.readMatrixFromHDFS(fname, iimd.getInputInfo(), rlen, clen, - mc.getRowsPerBlock(), mc.getColsPerBlock(), sparsity, getFileFormatProperties()); + mc.getRowsPerBlock(), mc.getColsPerBlock(), mc.getNonZeros(), getFileFormatProperties()); setHDFSFileExists(true); //sanity check correct output http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java b/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java index 7ada3e1..9ce11d9 100644 --- a/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java +++ b/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java @@ -24,12 +24,11 @@ import org.apache.sysml.runtime.matrix.data.InputInfo; public class ReadProperties { - // Properties common to all file formats public String path; public long rlen, clen; public int brlen, bclen; - public double expectedSparsity; + public long expectedNnz; public InputInfo inputInfo; public boolean localFS; @@ -41,7 +40,7 @@ public class ReadProperties clen = -1; brlen = -1; bclen = -1; - expectedSparsity = 0.1d; + expectedNnz = -1; inputInfo = null; localFS = false; } http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index 0c834b1..7aa50d9 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -94,7 +94,6 @@ public class DataConverter prop.bclen = bclen; prop.localFS = localFS; - //expected matrix is sparse (default SystemML usecase) return readMatrixFromHDFS(prop); } @@ -110,11 +109,10 @@ public class DataConverter prop.brlen = brlen; prop.bclen = bclen; - //expected matrix is sparse (default SystemML usecase) return readMatrixFromHDFS(prop); } - public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo inputinfo, long rlen, long clen, int brlen, int bclen, double expectedSparsity) + public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo inputinfo, long rlen, long clen, int brlen, int bclen, long expectedNnz) throws IOException { ReadProperties prop = new ReadProperties(); @@ -125,13 +123,13 @@ public class DataConverter prop.clen = clen; prop.brlen = brlen; prop.bclen = bclen; - prop.expectedSparsity = expectedSparsity; + prop.expectedNnz = expectedNnz; return readMatrixFromHDFS(prop); } public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo inputinfo, long rlen, long clen, - int brlen, int bclen, double expectedSparsity, boolean localFS) + int brlen, int bclen, long expectedNnz, boolean localFS) throws IOException { ReadProperties prop = new ReadProperties(); @@ -142,14 +140,14 @@ public class DataConverter prop.clen = clen; prop.brlen = brlen; prop.bclen = bclen; - prop.expectedSparsity = expectedSparsity; + prop.expectedNnz = expectedNnz; prop.localFS = localFS; return readMatrixFromHDFS(prop); } public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo inputinfo, long rlen, long clen, - int brlen, int bclen, double expectedSparsity, FileFormatProperties formatProperties) + int brlen, int bclen, long expectedNnz, FileFormatProperties formatProperties) throws IOException { ReadProperties prop = new ReadProperties(); @@ -160,10 +158,9 @@ public class DataConverter prop.clen = clen; prop.brlen = brlen; prop.bclen = bclen; - prop.expectedSparsity = expectedSparsity; + prop.expectedNnz = expectedNnz; prop.formatProperties = formatProperties; - //prop.printMe(); return readMatrixFromHDFS(prop); } @@ -193,14 +190,11 @@ public class DataConverter { //Timing time = new Timing(true); - long estnnz = (prop.expectedSparsity <= 0 || prop.rlen <= 0 || prop.clen <= 0) ? - -1 : (long)(prop.expectedSparsity*prop.rlen*prop.clen); - //core matrix reading MatrixBlock ret = null; try { MatrixReader reader = MatrixReaderFactory.createMatrixReader(prop); - ret = reader.readMatrixFromHDFS(prop.path, prop.rlen, prop.clen, prop.brlen, prop.bclen, estnnz); + ret = reader.readMatrixFromHDFS(prop.path, prop.rlen, prop.clen, prop.brlen, prop.bclen, prop.expectedNnz); } catch(DMLRuntimeException rex) { http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java b/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java index 201d32a..8e45674 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java @@ -60,27 +60,21 @@ public class CachingPWriteExportTest extends AutomatedTestBase } - /** - * - * @param outer execution mode of outer parfor loop - * @param inner execution mode of inner parfor loop - * @param instType execution mode of instructions - */ private void runTestExport( String outputFormat ) - { + { TestConfiguration config = getTestConfiguration(TEST_NAME); config.addVariable("rows", rows); config.addVariable("cols", cols); loadTestConfiguration(config); - /* This is for running the junit test the new way, i.e., construct the arguments directly */ String HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = HOME + TEST_NAME + ".dml"; programArgs = new String[]{"-args", input("V"), Integer.toString(rows), Integer.toString(cols), output("V"), outputFormat }; long seed = System.nanoTime(); - double[][] V = getRandomMatrix(rows, cols, 0, 1, sparsity, seed); + long nnz = (long)Math.round(sparsity * rows * cols); + double[][] V = getRandomMatrix(rows, cols, 0, 1, sparsity, seed); writeInputMatrix("V", V, true); //always text writeExpectedMatrix("V", V); @@ -96,7 +90,8 @@ public class CachingPWriteExportTest extends AutomatedTestBase else ii = InputInfo.TextCellInputInfo; - MatrixBlock mb = DataConverter.readMatrixFromHDFS(output("V"), ii, rows, cols, OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, sparsity); + MatrixBlock mb = DataConverter.readMatrixFromHDFS(output("V"), + ii, rows, cols, OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, nnz); Vp = DataConverter.convertToDoubleMatrix(mb); } catch(Exception ex) http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java b/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java index 78e4021..095b11f 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java @@ -324,6 +324,7 @@ public class FullStringInitializeTest extends AutomatedTestBase int cols = (intype==InputType.COL_VECTOR) ? 1 : colsMatrix; int rows = (intype==InputType.ROW_VECTOR) ? 1 : rowsMatrix; double sparsity = (sparse) ? spSparse : spDense; + long nnz = (long)Math.round(sparsity * rows * cols); //generate data double[][] A = getRandomMatrix(rows, cols, -5, 5, sparsity, 7); @@ -362,7 +363,7 @@ public class FullStringInitializeTest extends AutomatedTestBase if( !expectExcept ) { //compare matrices MatrixBlock ret = DataConverter.readMatrixFromHDFS(output("A"), InputInfo.TextCellInputInfo, - rows, cols, OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, sparsity, null); + rows, cols, OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, nnz, null); double[][] dret = DataConverter.convertToDoubleMatrix(ret); TestUtils.compareMatrices(A, dret, rows, cols, eps); }
