This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new 84ef713  [SYSTEMDS-52] Fix libsvm reader/writer integration and 
correctness
84ef713 is described below

commit 84ef71326c6781bad4ed9b39a210ee2cd4a6d4bd
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Apr 11 23:12:37 2020 +0200

    [SYSTEMDS-52] Fix libsvm reader/writer integration and correctness
    
    This patch fixes a correctness issue of the libsvm local writers, which
    incorrectly shifted the output indexes twice for space inputs.
    Furthermore, the libsvm local readers were not fully integrated in all
    code path yet.
    
    The distributed libsvm readers/writers still remain to be integrated.
---
 docs/Tasks.txt                                     |  4 +-
 .../sysds/runtime/io/MatrixReaderFactory.java      | 69 ++++++++++------------
 .../sysds/runtime/io/ReaderTextLIBSVMParallel.java |  2 +-
 .../apache/sysds/runtime/io/WriterTextLIBSVM.java  | 12 ++--
 .../test/functions/data/misc/NoRenameTest.java     | 48 +++++++--------
 src/test/scripts/functions/data/NoRenameTest1.dml  |  2 +-
 6 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/docs/Tasks.txt b/docs/Tasks.txt
index 7bb0c10..cfcab1a 100644
--- a/docs/Tasks.txt
+++ b/docs/Tasks.txt
@@ -47,8 +47,8 @@ SYSTEMDS-40 Preprocessing builtins
 
 SYSTEMDS-50 I/O Formats
  * 51 Support for homogeneous JSON (local/distributed)
- * 52 Support for libsvm files (local/distributed) 
- * 53 New sql data source (local, distributed)
+ * 52 Support for libsvm files (local/distributed)
+ * 53 New sql data source (local, distributed)                        
  * 54 Support for is.na, is.nan, is.infinite                          OK
 
 SYSTEMDS-60 Update SystemML improvements
diff --git a/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java 
b/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
index 3d2af34..168a336 100644
--- a/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
@@ -28,36 +28,34 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 
 public class MatrixReaderFactory 
 {
-
-       public static MatrixReader createMatrixReader( InputInfo iinfo ) 
+       public static MatrixReader createMatrixReader(InputInfo iinfo)
        {
                MatrixReader reader = null;
+               boolean par = 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS);
+               boolean mcsr = MatrixBlock.DEFAULT_SPARSEBLOCK == 
SparseBlock.Type.MCSR;
                
-               if( iinfo == InputInfo.TextCellInputInfo || iinfo == 
InputInfo.MatrixMarketInputInfo )
-               {
-                       if( 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
 && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
-                               reader = new ReaderTextCellParallel( iinfo );
-                       else
-                               reader = new ReaderTextCell( iinfo );
+               if( iinfo == InputInfo.TextCellInputInfo || iinfo == 
InputInfo.MatrixMarketInputInfo ) {
+                       reader = (par & mcsr) ? 
+                               new ReaderTextCellParallel(iinfo) : new 
ReaderTextCell(iinfo);
                }
-               else if( iinfo == InputInfo.CSVInputInfo )
-               {
-                       if( 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
 && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
-                               reader = new ReaderTextCSVParallel(new 
FileFormatPropertiesCSV());
-                       else
-                               reader = new ReaderTextCSV(new 
FileFormatPropertiesCSV());
+               else if( iinfo == InputInfo.CSVInputInfo ) {
+                       reader = (par & mcsr) ? 
+                               new ReaderTextCSVParallel(new 
FileFormatPropertiesCSV()) :
+                               new ReaderTextCSV(new 
FileFormatPropertiesCSV());
+               }
+               else if( iinfo == InputInfo.LIBSVMInputInfo) {
+                       reader = (par & mcsr) ? 
+                               new ReaderTextLIBSVMParallel() : new 
ReaderTextLIBSVM();
                }
                else if( iinfo == InputInfo.BinaryCellInputInfo ) 
                        reader = new ReaderBinaryCell();
                else if( iinfo == InputInfo.BinaryBlockInputInfo ) {
-                       if( 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS)
 && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
-                               reader = new ReaderBinaryBlockParallel( false );
-                       else
-                               reader = new ReaderBinaryBlock( false );
+                       reader = (par & mcsr) ? 
+                               new ReaderBinaryBlockParallel(false) : new 
ReaderBinaryBlock(false);
                }
                else {
                        throw new DMLRuntimeException("Failed to create matrix 
reader for unknown input info: "
-                                                  + 
InputInfo.inputInfoToString(iinfo));
+                               + InputInfo.inputInfoToString(iinfo));
                }
                
                return reader;
@@ -71,36 +69,33 @@ public class MatrixReaderFactory
                
                MatrixReader reader = null;
                InputInfo iinfo = props.inputInfo;
-
+               boolean par = 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS);
+               boolean mcsr = MatrixBlock.DEFAULT_SPARSEBLOCK == 
SparseBlock.Type.MCSR;
+               
                if( iinfo == InputInfo.TextCellInputInfo || iinfo == 
InputInfo.MatrixMarketInputInfo ) {
-                       if( 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
 && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
-                               reader = new ReaderTextCellParallel( iinfo );
-                       else
-                               reader = new ReaderTextCell( iinfo );
+                       reader = (par & mcsr) ? 
+                               new ReaderTextCellParallel(iinfo) : new 
ReaderTextCell(iinfo);
                }
                else if( iinfo == InputInfo.CSVInputInfo ) {
-                       if( 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
 && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
-                               reader = new ReaderTextCSVParallel( 
props.formatProperties!=null ? (FileFormatPropertiesCSV)props.formatProperties 
: new FileFormatPropertiesCSV());
-                       else
-                               reader = new ReaderTextCSV( 
props.formatProperties!=null ? (FileFormatPropertiesCSV)props.formatProperties 
: new FileFormatPropertiesCSV());
+                       reader = (par & mcsr) ?
+                               new ReaderTextCSVParallel( 
props.formatProperties!=null ?
+                                       
(FileFormatPropertiesCSV)props.formatProperties : new 
FileFormatPropertiesCSV()) :
+                               new ReaderTextCSV( props.formatProperties!=null 
? 
+                                       
(FileFormatPropertiesCSV)props.formatProperties : new 
FileFormatPropertiesCSV());
                }
                else if( iinfo == InputInfo.LIBSVMInputInfo) {
-                       if( 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
 && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
-                               reader = new ReaderTextLIBSVMParallel();
-                       else
-                               reader = new ReaderTextLIBSVM();
+                       reader = (par & mcsr) ? 
+                               new ReaderTextLIBSVMParallel() : new 
ReaderTextLIBSVM();
                }
                else if( iinfo == InputInfo.BinaryCellInputInfo ) 
                        reader = new ReaderBinaryCell();
                else if( iinfo == InputInfo.BinaryBlockInputInfo ) {
-                       if( 
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS)
 && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
-                               reader = new ReaderBinaryBlockParallel( 
props.localFS );
-                       else
-                               reader = new ReaderBinaryBlock( props.localFS );
+                       reader = (par & mcsr) ? 
+                               new ReaderBinaryBlockParallel(props.localFS) : 
new ReaderBinaryBlock(props.localFS);
                }
                else {
                        throw new DMLRuntimeException("Failed to create matrix 
reader for unknown input info: "
-                                                  + 
InputInfo.inputInfoToString(iinfo));
+                               + InputInfo.inputInfoToString(iinfo));
                }
                
                return reader;
diff --git 
a/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java 
b/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java
index c465c07..76fff10 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java
@@ -297,7 +297,7 @@ public class ReaderTextLIBSVMParallel extends MatrixReader
                                long rlen, long clen, int splitCount) 
                {
                        _split = split;
-                       _splitoffsets = offsets; // new 
SplitOffsetInfos(offsets);
+                       _splitoffsets = offsets;
                        _informat = informat;
                        _job = job;
                        _dest = dest;
diff --git a/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java 
b/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java
index 71521fd..f747b65 100644
--- a/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java
+++ b/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java
@@ -38,7 +38,6 @@ public class WriterTextLIBSVM extends MatrixWriter
        
        }
 
-       @SuppressWarnings("resource")
        @Override
        public final void writeMatrixToHDFS(MatrixBlock src, String fname, long 
rlen, long clen, int blen, long nnz, boolean diag) 
                throws IOException, DMLRuntimeException 
@@ -99,7 +98,6 @@ public class WriterTextLIBSVM extends MatrixWriter
                                        double label = (sblock!=null) ? 
                                                sblock.get(i, clen-1) : 0;
                                        sb.append(label);
-                                       sb.append(IOUtilFunctions.LIBSVM_DELIM);
                                        
                                        if( sblock!=null && i<sblock.numRows() 
&& !sblock.isEmpty(i) ) {
                                                int pos = sblock.pos(i);
@@ -108,9 +106,10 @@ public class WriterTextLIBSVM extends MatrixWriter
                                                double[] avals = 
sblock.values(i);
                                                // append sparse row
                                                for( int k=pos; k<pos+alen; k++ 
) {
-                                                       if( aix[k] != clen-1 )
-                                                               
appendIndexValLibsvm(sb, aix[k]+1, avals[k]);
-                                                       
sb.append(IOUtilFunctions.LIBSVM_DELIM);
+                                                       if( aix[k]!=clen-1 ) {
+                                                               
sb.append(IOUtilFunctions.LIBSVM_DELIM);
+                                                               
appendIndexValLibsvm(sb, aix[k], avals[k]);
+                                                       }
                                                }
                                        }
                                        // write the string row
@@ -125,14 +124,13 @@ public class WriterTextLIBSVM extends MatrixWriter
                                        // append the class label as the 1st 
column
                                        double label = 
src.getValueDenseUnsafe(i, clen-1);
                                        sb.append(label);
-                                       sb.append(IOUtilFunctions.LIBSVM_DELIM);
                                        
                                        // append dense row
                                        for( int j=0; j<clen-1; j++ ) {
                                                double val = 
src.getValueDenseUnsafe(i, j);
                                                if( val != 0 ) {
-                                                       
appendIndexValLibsvm(sb, j, val);
                                                        
sb.append(IOUtilFunctions.LIBSVM_DELIM);
+                                                       
appendIndexValLibsvm(sb, j, val);
                                                }
                                        }
                                        // write the string row
diff --git 
a/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java 
b/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java
index 619a325..d78041a 100644
--- a/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java
@@ -87,17 +87,17 @@ public class NoRenameTest extends AutomatedTestBase
        public void testTextmmSparseSinglenode() {
                runRenameTest("mm", true, ExecMode.SINGLE_NODE);
        }
-//     
-//     @Test
-//     public void testTextlibsvmDenseSinglenode() {
-//             runRenameTest("libsvm", false, ExecMode.SINGLE_NODE);
-//     }
-//     
-//     @Test
-//     public void testTextlibsvmSparseSinglenode() {
-//             runRenameTest("libsvm", true, ExecMode.SINGLE_NODE);
-//     }
-//     
+       
+       @Test
+       public void testTextlibsvmDenseSinglenode() {
+               runRenameTest("libsvm", false, ExecMode.SINGLE_NODE);
+       }
+       
+       @Test
+       public void testTextlibsvmSparseSinglenode() {
+               runRenameTest("libsvm", true, ExecMode.SINGLE_NODE);
+       }
+       
        @Test
        public void testBinaryDenseSinglenode() {
                runRenameTest("binary", false, ExecMode.SINGLE_NODE);
@@ -137,16 +137,16 @@ public class NoRenameTest extends AutomatedTestBase
        public void testTextmmSparseHybrid() {
                runRenameTest("mm", true, ExecMode.HYBRID);
        }
-//     
-//     @Test
-//     public void testTextlibsvmDenseHybrid() {
-//             runRenameTest("libsvm", false, ExecMode.HYBRID);
-//     }
-//     
-//     @Test
-//     public void testTextlibsvmSparseHybrid() {
-//             runRenameTest("libsvm", true, ExecMode.HYBRID);
-//     }
+       
+       @Test
+       public void testTextlibsvmDenseHybrid() {
+               runRenameTest("libsvm", false, ExecMode.HYBRID);
+       }
+       
+       @Test
+       public void testTextlibsvmSparseHybrid() {
+               runRenameTest("libsvm", true, ExecMode.HYBRID);
+       }
        
        @Test
        public void testBinaryDenseHybrid() {
@@ -187,7 +187,7 @@ public class NoRenameTest extends AutomatedTestBase
        public void testTextmmSparseSpark() {
                runRenameTest("mm", true, ExecMode.SPARK);
        }
-//     
+       
 //     @Test
 //     public void testTextlibsvmDenseSpark() {
 //             runRenameTest("libsvm", false, ExecMode.SPARK);
@@ -197,7 +197,7 @@ public class NoRenameTest extends AutomatedTestBase
 //     public void testTextlibsvmSparseSpark() {
 //             runRenameTest("libsvm", true, ExecMode.SPARK);
 //     }
-       
+//     
        @Test
        public void testBinaryDenseSpark() {
                runRenameTest("binary", false, ExecMode.SPARK);
@@ -251,4 +251,4 @@ public class NoRenameTest extends AutomatedTestBase
                        rtplatform = platformOld;
                }
        }
-}
\ No newline at end of file
+}
diff --git a/src/test/scripts/functions/data/NoRenameTest1.dml 
b/src/test/scripts/functions/data/NoRenameTest1.dml
index 8a8956d..e6d4a8d 100644
--- a/src/test/scripts/functions/data/NoRenameTest1.dml
+++ b/src/test/scripts/functions/data/NoRenameTest1.dml
@@ -20,5 +20,5 @@
 #-------------------------------------------------------------
 
 A = read($1, format=$2, rows=$3, cols=$4);
-print(nrow(A))
+print(nrow(A));
 write(A, $5, format=$2);

Reply via email to