[SYSTEMML-556] Extended jmlc api for reading frames (file/path), tests

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/14e898ac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/14e898ac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/14e898ac

Branch: refs/heads/master
Commit: 14e898acc4ac1dde839cc19342f0f3bd4fcab904
Parents: 2c19736
Author: Matthias Boehm <[email protected]>
Authored: Fri Apr 22 18:05:31 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Apr 22 21:51:09 2016 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/api/jmlc/Connection.java   | 150 +++++++++++++++++++
 .../sysml/runtime/io/FrameReaderTextCell.java   |  16 +-
 .../functions/jmlc/FrameReadMetaTest.java       |  25 +++-
 .../jmlc/tfmtd_frame_example/tfmtd_frame        |  26 ++++
 .../jmlc/tfmtd_frame_example/tfmtd_frame.mtd    |  10 ++
 5 files changed, 220 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/14e898ac/src/main/java/org/apache/sysml/api/jmlc/Connection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/Connection.java 
b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
index b384ca5..6a8dc45 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/Connection.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
@@ -55,6 +55,9 @@ import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.Program;
 import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
+import org.apache.sysml.runtime.io.FrameReader;
+import org.apache.sysml.runtime.io.FrameReaderFactory;
+import org.apache.sysml.runtime.io.FrameReaderTextCell;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
 import org.apache.sysml.runtime.io.MatrixReader;
 import org.apache.sysml.runtime.io.MatrixReaderFactory;
@@ -259,6 +262,10 @@ public class Connection
                return sb.toString();
        }
        
+       ////////////////////////////////////////////
+       // Read matrices
+       ////////////////////////////////////////////
+       
        /**
         * Reads an input matrix in arbitrary format from HDFS into a dense 
double array.
         * NOTE: this call currently only supports default configurations for 
CSV.
@@ -407,6 +414,149 @@ public class Connection
                return ret;
        }
        
+       ////////////////////////////////////////////
+       // Read frames
+       ////////////////////////////////////////////
+
+       /**
+        * Reads an input frame in arbitrary format from HDFS into a dense 
string array.
+        * NOTE: this call currently only supports default configurations for 
CSV.
+        * 
+        * @param fname the filename of the input frame
+        * @return frame as a two-dimensional string array
+        * @throws IOException
+        */
+       public String[][] readStringFrame(String fname) 
+               throws IOException
+       {
+               try {
+                       //read json meta data 
+                       String fnamemtd = DataExpression.getMTDFileName(fname);
+                       JSONObject jmtd = new 
DataExpression().readMetadataFile(fnamemtd, false);
+                       
+                       //parse json meta data 
+                       long rows = jmtd.getLong(DataExpression.READROWPARAM);
+                       long cols = jmtd.getLong(DataExpression.READCOLPARAM);
+                       String format = 
jmtd.getString(DataExpression.FORMAT_TYPE);
+                       InputInfo iinfo = 
InputInfo.stringExternalToInputInfo(format);                  
+               
+                       //read frame file
+                       return readStringFrame(fname, iinfo, rows, cols);
+               }
+               catch(Exception ex) {
+                       throw new IOException(ex);
+               }
+       }
+       
+       /**
+        * Reads an input frame in arbitrary format from HDFS into a dense 
string array.
+        * NOTE: this call currently only supports default configurations for 
CSV.
+        * 
+        * @param fname the filename of the input frame
+        * @param iinfo InputInfo object
+        * @param rows number of rows in the frame
+        * @param cols number of columns in the frame
+        * @return frame as a two-dimensional string array
+        * @throws IOException
+        */
+       public String[][] readStringFrame(String fname, InputInfo iinfo, long 
rows, long cols) 
+               throws IOException
+       {
+               try {
+                       FrameReader reader = 
FrameReaderFactory.createFrameReader(iinfo);
+                       FrameBlock mb = reader.readFrameFromHDFS(fname, rows, 
cols);
+                       return DataConverter.convertToStringFrame(mb);
+               }
+               catch(Exception ex) {
+                       throw new IOException(ex);
+               }
+       }
+       
+       /**
+        * Converts an input string representation of a frame in textcell format
+        * into a dense string array. The meta data string is the SystemML 
generated
+        * .mtd file including the number of rows and columns.
+        * 
+        * @param input string frame in textcell format
+        * @param meta string representing SystemML frame metadata in JSON 
format
+        * @return frame as a two-dimensional string array
+        * @throws IOException
+        */
+       public String[][] convertToStringFrame(String input, String meta) 
+               throws IOException
+       {
+               try {
+                       //parse json meta data 
+                       JSONObject jmtd = new JSONObject(meta);
+                       int rows = jmtd.getInt(DataExpression.READROWPARAM);
+                       int cols = jmtd.getInt(DataExpression.READCOLPARAM);
+                       String format = 
jmtd.getString(DataExpression.FORMAT_TYPE);
+       
+                       //sanity check input format
+                       
if(!(DataExpression.FORMAT_TYPE_VALUE_TEXT.equals(format)
+                               
||DataExpression.FORMAT_TYPE_VALUE_MATRIXMARKET.equals(format))) {
+                               throw new IOException("Invalid input format 
(expected: text or mm): "+format);
+                       }
+                       
+                       //parse the input frame
+                       return convertToStringFrame(input, rows, cols);
+               }
+               catch(Exception ex) {
+                       throw new IOException(ex);
+               }
+       }
+       
+       /**
+        * Converts an input string representation of a frame in textcell format
+        * into a dense string array. The number of rows and columns need to be 
+        * specified because textcell only represents non-zero values and hence
+        * does not define the dimensions in the general case.
+        * 
+        * @param input string frame in textcell format
+        * @param rows number of rows in the frame
+        * @param cols number of columns in the frame
+        * @return frame as a two-dimensional string array
+        * @throws IOException
+        */
+       public String[][] convertToStringFrame(String input, int rows, int 
cols) 
+               throws IOException
+       {
+               InputStream is = IOUtilFunctions.toInputStream(input);
+               return convertToStringFrame(is, rows, cols);
+       }
+       
+       /**
+        * Converts an input stream of a string frame in textcell format
+        * into a dense string array. The number of rows and columns need to be 
+        * specified because textcell only represents non-zero values and hence
+        * does not define the dimensions in the general case.
+        * 
+        * @param input InputStream to a string frame in textcell format
+        * @param rows number of rows in the frame
+        * @param cols number of columns in the frame
+        * @return frame as a two-dimensional string array
+        * @throws IOException
+        */
+       public String[][] convertToStringFrame(InputStream input, int rows, int 
cols) 
+               throws IOException
+       {
+               String[][] ret = null;
+               
+               try {
+                       //read input matrix
+                       FrameReaderTextCell reader = 
(FrameReaderTextCell)FrameReaderFactory.createFrameReader(InputInfo.TextCellInputInfo);
+                       FrameBlock mb = reader.readFrameFromInputStream(input, 
rows, cols);
+               
+                       //convert to double array
+                       ret = DataConverter.convertToStringFrame( mb );
+               }
+               catch(DMLRuntimeException rex) {
+                       throw new IOException( rex );
+               }
+               
+               return ret;
+       }
+       
        
        ////////////////////////////////////////////
        // Read transform meta data

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/14e898ac/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java 
b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
index 91b7892..2f42a64 100644
--- a/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
+++ b/src/main/java/org/apache/sysml/runtime/io/FrameReaderTextCell.java
@@ -84,6 +84,20 @@ public class FrameReaderTextCell extends FrameReader
        /**
         * 
         * @param is
+        * @param rlen
+        * @param clen
+        * @return
+        * @throws IOException
+        * @throws DMLRuntimeException
+        */
+       public FrameBlock readFrameFromInputStream(InputStream is, long rlen, 
long clen) 
+               throws IOException, DMLRuntimeException {
+               return readFrameFromInputStream(is, getDefSchema(clen), 
getDefColNames(clen), rlen, clen);
+       }
+       
+       /**
+        * 
+        * @param is
         * @param schema
         * @param names
         * @param rlen
@@ -93,7 +107,7 @@ public class FrameReaderTextCell extends FrameReader
         * @throws IOException 
         */
        public FrameBlock readFrameFromInputStream(InputStream is, 
List<ValueType> schema, List<String> names, long rlen, long clen) 
-                       throws IOException, DMLRuntimeException 
+               throws IOException, DMLRuntimeException 
        {
                //allocate output frame block
                FrameBlock ret = createOutputFrameBlock(schema, names, rlen);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/14e898ac/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
index 05beeed..1abe68a 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
@@ -30,6 +30,7 @@ import org.apache.sysml.api.jmlc.PreparedScript;
 import org.apache.sysml.api.jmlc.ResultVariables;
 import org.apache.sysml.lops.Lop;
 import org.apache.sysml.runtime.matrix.data.FrameBlock;
+import org.apache.sysml.runtime.util.DataConverter;
 import org.apache.sysml.runtime.util.MapReduceTool;
 import org.apache.sysml.test.integration.AutomatedTestBase;
 import org.apache.sysml.test.integration.TestConfiguration;
@@ -54,22 +55,32 @@ public class FrameReadMetaTest extends AutomatedTestBase
        
        @Test
        public void testJMLCTransformDenseSpec() throws IOException {
-               runJMLCReadMetaTest(TEST_NAME1, false, true);
+               runJMLCReadMetaTest(TEST_NAME1, false, false, true);
        }
        
        @Test
        public void testJMLCTransformDenseReuseSpec() throws IOException {
-               runJMLCReadMetaTest(TEST_NAME1, true, true);
+               runJMLCReadMetaTest(TEST_NAME1, true, false, true);
        }
        
        @Test
        public void testJMLCTransformDense() throws IOException {
-               runJMLCReadMetaTest(TEST_NAME1, false, false);
+               runJMLCReadMetaTest(TEST_NAME1, false, false, false);
        }
        
        @Test
        public void testJMLCTransformDenseReuse() throws IOException {
-               runJMLCReadMetaTest(TEST_NAME1, true, false);
+               runJMLCReadMetaTest(TEST_NAME1, true, false, false);
+       }
+       
+       @Test
+       public void testJMLCTransformDenseReadFrame() throws IOException {
+               runJMLCReadMetaTest(TEST_NAME1, false, true, false);
+       }
+       
+       @Test
+       public void testJMLCTransformDenseReuseReadFrame() throws IOException {
+               runJMLCReadMetaTest(TEST_NAME1, true, true, false);
        }
 
        /**
@@ -79,7 +90,7 @@ public class FrameReadMetaTest extends AutomatedTestBase
         * @param instType
         * @throws IOException 
         */
-       private void runJMLCReadMetaTest( String testname, boolean modelReuse, 
boolean useSpec ) 
+       private void runJMLCReadMetaTest( String testname, boolean modelReuse, 
boolean readFrame, boolean useSpec ) 
                throws IOException
        {       
                String TEST_NAME = testname;
@@ -92,7 +103,9 @@ public class FrameReadMetaTest extends AutomatedTestBase
                
                //read meta data frame
                String spec = MapReduceTool.readStringFromHDFSFile(SCRIPT_DIR + 
TEST_DIR+"tfmtd_example/spec.json");
-               FrameBlock M = conn.readTransformMetaDataFromFile(useSpec ? 
spec : null, SCRIPT_DIR + TEST_DIR+"tfmtd_example/");
+               FrameBlock M = readFrame ?
+                               
DataConverter.convertToFrameBlock(conn.readStringFrame(SCRIPT_DIR + 
TEST_DIR+"tfmtd_frame_example/tfmtd_frame")) : 
+                               conn.readTransformMetaDataFromFile(useSpec ? 
spec : null, SCRIPT_DIR + TEST_DIR+"tfmtd_example/");
                
                //generate data based on recode maps
                HashMap<String,Long>[] RC = getRecodeMaps(M);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/14e898ac/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame 
b/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame
new file mode 100644
index 0000000..60a3c18
--- /dev/null
+++ b/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame
@@ -0,0 +1,26 @@
+1 1 91312·1
+1 2 east·1
+1 4 1·1
+1 5 1·1
+1 6 1·1
+1 7 FALSE·1
+2 1 94555·2
+2 2 north·2
+2 4 2·2
+2 5 1.5·2
+2 6 2·2
+2 7 TRUE·2
+3 1 95141·3
+3 2 south·3
+3 4 3·3
+3 5 2·3
+3 6 3·3
+4 1 96334·4
+4 2 west·4
+4 4 4·4
+4 5 2.5·4
+5 1 98755·5
+5 4 5·5
+5 5 3·5
+6 4 6·6
+7 4 7·7

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/14e898ac/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame.mtd
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame.mtd 
b/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame.mtd
new file mode 100644
index 0000000..bc8b601
--- /dev/null
+++ b/src/test/scripts/functions/jmlc/tfmtd_frame_example/tfmtd_frame.mtd
@@ -0,0 +1,10 @@
+{
+    "data_type": "frame",
+    "value_type": "string",
+    "rows": 7,
+    "cols": 9,
+    "format": "text",
+    "description": {
+        "author": "SystemML"
+    }
+}
\ No newline at end of file

Reply via email to