This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push:
new 84ef713 [SYSTEMDS-52] Fix libsvm reader/writer integration and
correctness
84ef713 is described below
commit 84ef71326c6781bad4ed9b39a210ee2cd4a6d4bd
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Apr 11 23:12:37 2020 +0200
[SYSTEMDS-52] Fix libsvm reader/writer integration and correctness
This patch fixes a correctness issue of the libsvm local writers, which
incorrectly shifted the output indexes twice for space inputs.
Furthermore, the libsvm local readers were not fully integrated in all
code path yet.
The distributed libsvm readers/writers still remain to be integrated.
---
docs/Tasks.txt | 4 +-
.../sysds/runtime/io/MatrixReaderFactory.java | 69 ++++++++++------------
.../sysds/runtime/io/ReaderTextLIBSVMParallel.java | 2 +-
.../apache/sysds/runtime/io/WriterTextLIBSVM.java | 12 ++--
.../test/functions/data/misc/NoRenameTest.java | 48 +++++++--------
src/test/scripts/functions/data/NoRenameTest1.dml | 2 +-
6 files changed, 65 insertions(+), 72 deletions(-)
diff --git a/docs/Tasks.txt b/docs/Tasks.txt
index 7bb0c10..cfcab1a 100644
--- a/docs/Tasks.txt
+++ b/docs/Tasks.txt
@@ -47,8 +47,8 @@ SYSTEMDS-40 Preprocessing builtins
SYSTEMDS-50 I/O Formats
* 51 Support for homogeneous JSON (local/distributed)
- * 52 Support for libsvm files (local/distributed)
- * 53 New sql data source (local, distributed)
+ * 52 Support for libsvm files (local/distributed)
+ * 53 New sql data source (local, distributed)
* 54 Support for is.na, is.nan, is.infinite OK
SYSTEMDS-60 Update SystemML improvements
diff --git a/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
b/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
index 3d2af34..168a336 100644
--- a/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/io/MatrixReaderFactory.java
@@ -28,36 +28,34 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock;
public class MatrixReaderFactory
{
-
- public static MatrixReader createMatrixReader( InputInfo iinfo )
+ public static MatrixReader createMatrixReader(InputInfo iinfo)
{
MatrixReader reader = null;
+ boolean par =
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS);
+ boolean mcsr = MatrixBlock.DEFAULT_SPARSEBLOCK ==
SparseBlock.Type.MCSR;
- if( iinfo == InputInfo.TextCellInputInfo || iinfo ==
InputInfo.MatrixMarketInputInfo )
- {
- if(
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
&& MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
- reader = new ReaderTextCellParallel( iinfo );
- else
- reader = new ReaderTextCell( iinfo );
+ if( iinfo == InputInfo.TextCellInputInfo || iinfo ==
InputInfo.MatrixMarketInputInfo ) {
+ reader = (par & mcsr) ?
+ new ReaderTextCellParallel(iinfo) : new
ReaderTextCell(iinfo);
}
- else if( iinfo == InputInfo.CSVInputInfo )
- {
- if(
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
&& MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
- reader = new ReaderTextCSVParallel(new
FileFormatPropertiesCSV());
- else
- reader = new ReaderTextCSV(new
FileFormatPropertiesCSV());
+ else if( iinfo == InputInfo.CSVInputInfo ) {
+ reader = (par & mcsr) ?
+ new ReaderTextCSVParallel(new
FileFormatPropertiesCSV()) :
+ new ReaderTextCSV(new
FileFormatPropertiesCSV());
+ }
+ else if( iinfo == InputInfo.LIBSVMInputInfo) {
+ reader = (par & mcsr) ?
+ new ReaderTextLIBSVMParallel() : new
ReaderTextLIBSVM();
}
else if( iinfo == InputInfo.BinaryCellInputInfo )
reader = new ReaderBinaryCell();
else if( iinfo == InputInfo.BinaryBlockInputInfo ) {
- if(
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS)
&& MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
- reader = new ReaderBinaryBlockParallel( false );
- else
- reader = new ReaderBinaryBlock( false );
+ reader = (par & mcsr) ?
+ new ReaderBinaryBlockParallel(false) : new
ReaderBinaryBlock(false);
}
else {
throw new DMLRuntimeException("Failed to create matrix
reader for unknown input info: "
- +
InputInfo.inputInfoToString(iinfo));
+ + InputInfo.inputInfoToString(iinfo));
}
return reader;
@@ -71,36 +69,33 @@ public class MatrixReaderFactory
MatrixReader reader = null;
InputInfo iinfo = props.inputInfo;
-
+ boolean par =
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS);
+ boolean mcsr = MatrixBlock.DEFAULT_SPARSEBLOCK ==
SparseBlock.Type.MCSR;
+
if( iinfo == InputInfo.TextCellInputInfo || iinfo ==
InputInfo.MatrixMarketInputInfo ) {
- if(
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
&& MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
- reader = new ReaderTextCellParallel( iinfo );
- else
- reader = new ReaderTextCell( iinfo );
+ reader = (par & mcsr) ?
+ new ReaderTextCellParallel(iinfo) : new
ReaderTextCell(iinfo);
}
else if( iinfo == InputInfo.CSVInputInfo ) {
- if(
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
&& MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
- reader = new ReaderTextCSVParallel(
props.formatProperties!=null ? (FileFormatPropertiesCSV)props.formatProperties
: new FileFormatPropertiesCSV());
- else
- reader = new ReaderTextCSV(
props.formatProperties!=null ? (FileFormatPropertiesCSV)props.formatProperties
: new FileFormatPropertiesCSV());
+ reader = (par & mcsr) ?
+ new ReaderTextCSVParallel(
props.formatProperties!=null ?
+
(FileFormatPropertiesCSV)props.formatProperties : new
FileFormatPropertiesCSV()) :
+ new ReaderTextCSV( props.formatProperties!=null
?
+
(FileFormatPropertiesCSV)props.formatProperties : new
FileFormatPropertiesCSV());
}
else if( iinfo == InputInfo.LIBSVMInputInfo) {
- if(
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS)
&& MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
- reader = new ReaderTextLIBSVMParallel();
- else
- reader = new ReaderTextLIBSVM();
+ reader = (par & mcsr) ?
+ new ReaderTextLIBSVMParallel() : new
ReaderTextLIBSVM();
}
else if( iinfo == InputInfo.BinaryCellInputInfo )
reader = new ReaderBinaryCell();
else if( iinfo == InputInfo.BinaryBlockInputInfo ) {
- if(
ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS)
&& MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR )
- reader = new ReaderBinaryBlockParallel(
props.localFS );
- else
- reader = new ReaderBinaryBlock( props.localFS );
+ reader = (par & mcsr) ?
+ new ReaderBinaryBlockParallel(props.localFS) :
new ReaderBinaryBlock(props.localFS);
}
else {
throw new DMLRuntimeException("Failed to create matrix
reader for unknown input info: "
- +
InputInfo.inputInfoToString(iinfo));
+ + InputInfo.inputInfoToString(iinfo));
}
return reader;
diff --git
a/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java
b/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java
index c465c07..76fff10 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderTextLIBSVMParallel.java
@@ -297,7 +297,7 @@ public class ReaderTextLIBSVMParallel extends MatrixReader
long rlen, long clen, int splitCount)
{
_split = split;
- _splitoffsets = offsets; // new
SplitOffsetInfos(offsets);
+ _splitoffsets = offsets;
_informat = informat;
_job = job;
_dest = dest;
diff --git a/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java
b/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java
index 71521fd..f747b65 100644
--- a/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java
+++ b/src/main/java/org/apache/sysds/runtime/io/WriterTextLIBSVM.java
@@ -38,7 +38,6 @@ public class WriterTextLIBSVM extends MatrixWriter
}
- @SuppressWarnings("resource")
@Override
public final void writeMatrixToHDFS(MatrixBlock src, String fname, long
rlen, long clen, int blen, long nnz, boolean diag)
throws IOException, DMLRuntimeException
@@ -99,7 +98,6 @@ public class WriterTextLIBSVM extends MatrixWriter
double label = (sblock!=null) ?
sblock.get(i, clen-1) : 0;
sb.append(label);
- sb.append(IOUtilFunctions.LIBSVM_DELIM);
if( sblock!=null && i<sblock.numRows()
&& !sblock.isEmpty(i) ) {
int pos = sblock.pos(i);
@@ -108,9 +106,10 @@ public class WriterTextLIBSVM extends MatrixWriter
double[] avals =
sblock.values(i);
// append sparse row
for( int k=pos; k<pos+alen; k++
) {
- if( aix[k] != clen-1 )
-
appendIndexValLibsvm(sb, aix[k]+1, avals[k]);
-
sb.append(IOUtilFunctions.LIBSVM_DELIM);
+ if( aix[k]!=clen-1 ) {
+
sb.append(IOUtilFunctions.LIBSVM_DELIM);
+
appendIndexValLibsvm(sb, aix[k], avals[k]);
+ }
}
}
// write the string row
@@ -125,14 +124,13 @@ public class WriterTextLIBSVM extends MatrixWriter
// append the class label as the 1st
column
double label =
src.getValueDenseUnsafe(i, clen-1);
sb.append(label);
- sb.append(IOUtilFunctions.LIBSVM_DELIM);
// append dense row
for( int j=0; j<clen-1; j++ ) {
double val =
src.getValueDenseUnsafe(i, j);
if( val != 0 ) {
-
appendIndexValLibsvm(sb, j, val);
sb.append(IOUtilFunctions.LIBSVM_DELIM);
+
appendIndexValLibsvm(sb, j, val);
}
}
// write the string row
diff --git
a/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java
b/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java
index 619a325..d78041a 100644
--- a/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/data/misc/NoRenameTest.java
@@ -87,17 +87,17 @@ public class NoRenameTest extends AutomatedTestBase
public void testTextmmSparseSinglenode() {
runRenameTest("mm", true, ExecMode.SINGLE_NODE);
}
-//
-// @Test
-// public void testTextlibsvmDenseSinglenode() {
-// runRenameTest("libsvm", false, ExecMode.SINGLE_NODE);
-// }
-//
-// @Test
-// public void testTextlibsvmSparseSinglenode() {
-// runRenameTest("libsvm", true, ExecMode.SINGLE_NODE);
-// }
-//
+
+ @Test
+ public void testTextlibsvmDenseSinglenode() {
+ runRenameTest("libsvm", false, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTextlibsvmSparseSinglenode() {
+ runRenameTest("libsvm", true, ExecMode.SINGLE_NODE);
+ }
+
@Test
public void testBinaryDenseSinglenode() {
runRenameTest("binary", false, ExecMode.SINGLE_NODE);
@@ -137,16 +137,16 @@ public class NoRenameTest extends AutomatedTestBase
public void testTextmmSparseHybrid() {
runRenameTest("mm", true, ExecMode.HYBRID);
}
-//
-// @Test
-// public void testTextlibsvmDenseHybrid() {
-// runRenameTest("libsvm", false, ExecMode.HYBRID);
-// }
-//
-// @Test
-// public void testTextlibsvmSparseHybrid() {
-// runRenameTest("libsvm", true, ExecMode.HYBRID);
-// }
+
+ @Test
+ public void testTextlibsvmDenseHybrid() {
+ runRenameTest("libsvm", false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTextlibsvmSparseHybrid() {
+ runRenameTest("libsvm", true, ExecMode.HYBRID);
+ }
@Test
public void testBinaryDenseHybrid() {
@@ -187,7 +187,7 @@ public class NoRenameTest extends AutomatedTestBase
public void testTextmmSparseSpark() {
runRenameTest("mm", true, ExecMode.SPARK);
}
-//
+
// @Test
// public void testTextlibsvmDenseSpark() {
// runRenameTest("libsvm", false, ExecMode.SPARK);
@@ -197,7 +197,7 @@ public class NoRenameTest extends AutomatedTestBase
// public void testTextlibsvmSparseSpark() {
// runRenameTest("libsvm", true, ExecMode.SPARK);
// }
-
+//
@Test
public void testBinaryDenseSpark() {
runRenameTest("binary", false, ExecMode.SPARK);
@@ -251,4 +251,4 @@ public class NoRenameTest extends AutomatedTestBase
rtplatform = platformOld;
}
}
-}
\ No newline at end of file
+}
diff --git a/src/test/scripts/functions/data/NoRenameTest1.dml
b/src/test/scripts/functions/data/NoRenameTest1.dml
index 8a8956d..e6d4a8d 100644
--- a/src/test/scripts/functions/data/NoRenameTest1.dml
+++ b/src/test/scripts/functions/data/NoRenameTest1.dml
@@ -20,5 +20,5 @@
#-------------------------------------------------------------
A = read($1, format=$2, rows=$3, cols=$4);
-print(nrow(A))
+print(nrow(A));
write(A, $5, format=$2);