Repository: systemml
Updated Branches:
  refs/heads/master f8dc5c18b -> 728b2c904


[SYSTEMML-2275] Fix incorrect expected nnz on reading matrices from HDFS

This patch fixes a severe issue (introduced with SYSTEMML-2217, i.e.,
after our 1.1 release) of incorrect expected nnz on reading matrices
from HDFS. In detail the data converter API required expected sparsity
which was incorrectly computed for (nnz x cols, rows) instead (rows x
cols, nnz). We now directly pass the nnz to the readers.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e8774a7a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e8774a7a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e8774a7a

Branch: refs/heads/master
Commit: e8774a7a272d1ba4060020b143de6b969e3daabf
Parents: f8dc5c1
Author: Matthias Boehm <[email protected]>
Authored: Sun Apr 22 15:45:29 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sun Apr 22 15:45:29 2018 -0700

----------------------------------------------------------------------
 .../controlprogram/caching/MatrixObject.java    |  4 +---
 .../apache/sysml/runtime/io/ReadProperties.java |  5 ++---
 .../sysml/runtime/util/DataConverter.java       | 20 +++++++-------------
 .../caching/CachingPWriteExportTest.java        | 15 +++++----------
 .../data/FullStringInitializeTest.java          |  3 ++-
 5 files changed, 17 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
index 678ddc0..ca704e0 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
@@ -431,10 +431,8 @@ public class MatrixObject extends 
CacheableData<MatrixBlock>
                }
                
                //read matrix and maintain meta data
-               double sparsity = (mc.getNonZeros() < 0) ? 
(iimd.getInputInfo().isTextIJV()?-1:1) :
-                       
OptimizerUtils.getSparsity(mc.getNonZeros(),mc.getRows(),mc.getCols());
                MatrixBlock newData = DataConverter.readMatrixFromHDFS(fname, 
iimd.getInputInfo(), rlen, clen,
-                               mc.getRowsPerBlock(), mc.getColsPerBlock(), 
sparsity, getFileFormatProperties());
+                               mc.getRowsPerBlock(), mc.getColsPerBlock(), 
mc.getNonZeros(), getFileFormatProperties());
                setHDFSFileExists(true);
                
                //sanity check correct output

http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java 
b/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java
index 7ada3e1..9ce11d9 100644
--- a/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java
+++ b/src/main/java/org/apache/sysml/runtime/io/ReadProperties.java
@@ -24,12 +24,11 @@ import org.apache.sysml.runtime.matrix.data.InputInfo;
 
 public class ReadProperties 
 {
-
        // Properties common to all file formats 
        public String path;
        public long rlen, clen;
        public int brlen, bclen;
-       public double expectedSparsity;
+       public long expectedNnz;
        public InputInfo inputInfo;
        public boolean localFS;
        
@@ -41,7 +40,7 @@ public class ReadProperties
                clen = -1;
                brlen = -1;
                bclen = -1;
-               expectedSparsity = 0.1d;
+               expectedNnz = -1;
                inputInfo = null;
                localFS = false;
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java 
b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index 0c834b1..7aa50d9 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -94,7 +94,6 @@ public class DataConverter
                prop.bclen = bclen;
                prop.localFS = localFS;
                
-               //expected matrix is sparse (default SystemML usecase)
                return readMatrixFromHDFS(prop);
        }
 
@@ -110,11 +109,10 @@ public class DataConverter
                prop.brlen = brlen;
                prop.bclen = bclen;
                
-               //expected matrix is sparse (default SystemML usecase)
                return readMatrixFromHDFS(prop);
        }
 
-       public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo 
inputinfo, long rlen, long clen, int brlen, int bclen, double expectedSparsity) 
+       public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo 
inputinfo, long rlen, long clen, int brlen, int bclen, long expectedNnz) 
                throws IOException
        {
                ReadProperties prop = new ReadProperties();
@@ -125,13 +123,13 @@ public class DataConverter
                prop.clen = clen;
                prop.brlen = brlen;
                prop.bclen = bclen;
-               prop.expectedSparsity = expectedSparsity;
+               prop.expectedNnz = expectedNnz;
                
                return readMatrixFromHDFS(prop);
        }
 
        public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo 
inputinfo, long rlen, long clen, 
-                       int brlen, int bclen, double expectedSparsity, boolean 
localFS) 
+                       int brlen, int bclen, long expectedNnz, boolean 
localFS) 
                throws IOException
        {
                ReadProperties prop = new ReadProperties();
@@ -142,14 +140,14 @@ public class DataConverter
                prop.clen = clen;
                prop.brlen = brlen;
                prop.bclen = bclen;
-               prop.expectedSparsity = expectedSparsity;
+               prop.expectedNnz = expectedNnz;
                prop.localFS = localFS;
                
                return readMatrixFromHDFS(prop);
        }
 
        public static MatrixBlock readMatrixFromHDFS(String dir, InputInfo 
inputinfo, long rlen, long clen, 
-                       int brlen, int bclen, double expectedSparsity, 
FileFormatProperties formatProperties) 
+                       int brlen, int bclen, long expectedNnz, 
FileFormatProperties formatProperties) 
        throws IOException
        {
                ReadProperties prop = new ReadProperties();
@@ -160,10 +158,9 @@ public class DataConverter
                prop.clen = clen;
                prop.brlen = brlen;
                prop.bclen = bclen;
-               prop.expectedSparsity = expectedSparsity;
+               prop.expectedNnz = expectedNnz;
                prop.formatProperties = formatProperties;
                
-               //prop.printMe();
                return readMatrixFromHDFS(prop);
        }
        
@@ -193,14 +190,11 @@ public class DataConverter
        {       
                //Timing time = new Timing(true);
                
-               long estnnz = (prop.expectedSparsity <= 0 || prop.rlen <= 0 || 
prop.clen <= 0) ? 
-                       -1 : (long)(prop.expectedSparsity*prop.rlen*prop.clen);
-       
                //core matrix reading 
                MatrixBlock ret = null;
                try {
                        MatrixReader reader = 
MatrixReaderFactory.createMatrixReader(prop);
-                       ret = reader.readMatrixFromHDFS(prop.path, prop.rlen, 
prop.clen, prop.brlen, prop.bclen, estnnz);
+                       ret = reader.readMatrixFromHDFS(prop.path, prop.rlen, 
prop.clen, prop.brlen, prop.bclen, prop.expectedNnz);
                }
                catch(DMLRuntimeException rex)
                {

http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java
index 201d32a..8e45674 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/caching/CachingPWriteExportTest.java
@@ -60,27 +60,21 @@ public class CachingPWriteExportTest extends 
AutomatedTestBase
        }
        
        
-       /**
-        * 
-        * @param outer execution mode of outer parfor loop
-        * @param inner execution mode of inner parfor loop
-        * @param instType execution mode of instructions
-        */
        private void runTestExport( String outputFormat )
-       {                               
+       {
                TestConfiguration config = getTestConfiguration(TEST_NAME);
                config.addVariable("rows", rows);
                config.addVariable("cols", cols);
                loadTestConfiguration(config);
                
-               /* This is for running the junit test the new way, i.e., 
construct the arguments directly */
                String HOME = SCRIPT_DIR + TEST_DIR;
                fullDMLScriptName = HOME + TEST_NAME + ".dml";
                programArgs = new String[]{"-args", input("V"),
                        Integer.toString(rows), Integer.toString(cols), 
output("V"), outputFormat };
 
                long seed = System.nanoTime();
-        double[][] V = getRandomMatrix(rows, cols, 0, 1, sparsity, seed);
+               long nnz = (long)Math.round(sparsity * rows * cols);
+               double[][] V = getRandomMatrix(rows, cols, 0, 1, sparsity, 
seed);
                writeInputMatrix("V", V, true); //always text
                writeExpectedMatrix("V", V);
                
@@ -96,7 +90,8 @@ public class CachingPWriteExportTest extends AutomatedTestBase
                        else
                                ii = InputInfo.TextCellInputInfo;
                        
-                       MatrixBlock mb = 
DataConverter.readMatrixFromHDFS(output("V"), ii, rows, cols, 
OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, sparsity);
+                       MatrixBlock mb = 
DataConverter.readMatrixFromHDFS(output("V"),
+                               ii, rows, cols, 
OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, nnz);
                        Vp = DataConverter.convertToDoubleMatrix(mb);
                }
                catch(Exception ex)

http://git-wip-us.apache.org/repos/asf/systemml/blob/e8774a7a/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java
index 78e4021..095b11f 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/data/FullStringInitializeTest.java
@@ -324,6 +324,7 @@ public class FullStringInitializeTest extends 
AutomatedTestBase
                        int cols = (intype==InputType.COL_VECTOR) ? 1 : 
colsMatrix;
                        int rows = (intype==InputType.ROW_VECTOR) ? 1 : 
rowsMatrix;
                        double sparsity = (sparse) ? spSparse : spDense;
+                       long nnz = (long)Math.round(sparsity * rows * cols);
                        
                        //generate data
                        double[][] A = getRandomMatrix(rows, cols, -5, 5, 
sparsity, 7); 
@@ -362,7 +363,7 @@ public class FullStringInitializeTest extends 
AutomatedTestBase
                        if( !expectExcept ) {
                                //compare matrices 
                                MatrixBlock ret = 
DataConverter.readMatrixFromHDFS(output("A"), InputInfo.TextCellInputInfo,
-                                               rows, cols, 
OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, sparsity, 
null);
+                                       rows, cols, 
OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, nnz, null);
                                double[][] dret = 
DataConverter.convertToDoubleMatrix(ret);
                                TestUtils.compareMatrices(A, dret, rows, cols, 
eps);
                        }

Reply via email to