Repository: incubator-systemml
Updated Branches:
  refs/heads/master 3a3c16599 -> 34a2e1d8a


[SYSTEMML-556] Simplified json meta data string construction, for apis

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8c4f83a1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8c4f83a1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8c4f83a1

Branch: refs/heads/master
Commit: 8c4f83a15491c8c5afa539102de188ac1fa5c25d
Parents: 3a3c165
Author: Matthias Boehm <[email protected]>
Authored: Fri Jun 24 20:19:22 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Jun 24 20:37:26 2016 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/OutputInfo.java   |  22 +++
 .../sysml/runtime/util/MapReduceTool.java       | 191 ++++++++-----------
 2 files changed, 98 insertions(+), 115 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8c4f83a1/src/main/java/org/apache/sysml/runtime/matrix/data/OutputInfo.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/OutputInfo.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/OutputInfo.java
index 4d4a975..3efbee3 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/OutputInfo.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/OutputInfo.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.sysml.parser.DataExpression;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import 
org.apache.sysml.runtime.matrix.mapred.CSVWriteReducer.RowBlockForTextOutput;
 import org.apache.sysml.runtime.matrix.sort.CompactOutputFormat;
@@ -140,6 +141,27 @@ public class OutputInfo implements Serializable
                        throw new DMLRuntimeException("Unrecognized outputInfo: 
" + oi);
        }
        
+       /**
+        * 
+        * @param oinfo
+        * @return
+        * @throws DMLRuntimeException 
+        */
+       public static String outputInfoToStringExternal(OutputInfo oinfo) 
+               throws DMLRuntimeException 
+       {
+               if( oinfo == OutputInfo.TextCellOutputInfo )
+                       return DataExpression.FORMAT_TYPE_VALUE_TEXT;
+               else if( oinfo == OutputInfo.MatrixMarketOutputInfo )
+                       return DataExpression.FORMAT_TYPE_VALUE_MATRIXMARKET;
+               else if( oinfo == OutputInfo.CSVOutputInfo )
+                       return DataExpression.FORMAT_TYPE_VALUE_CSV;
+               else if( oinfo == OutputInfo.BinaryBlockOutputInfo )
+                       return DataExpression.FORMAT_TYPE_VALUE_BINARY;
+               else
+                       throw new DMLRuntimeException("Unrecognized outputInfo: 
" + oinfo);
+       }
+       
        @Override
        public boolean equals( Object o ) 
        {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8c4f83a1/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java 
b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
index 1a11af9..77d5bd6 100644
--- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
+++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
@@ -55,6 +55,7 @@ import 
org.apache.sysml.runtime.matrix.data.NumItemsByEachReducerMetaData;
 import org.apache.sysml.runtime.matrix.data.OutputInfo;
 import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
 import org.apache.sysml.runtime.matrix.sort.ReadWithZeros;
+import org.apache.wink.json4j.JSONException;
 import org.apache.wink.json4j.OrderedJSONObject;
 
 
@@ -395,7 +396,6 @@ public class MapReduceTool
         }
         br.write(line.toString());
         br.close();
-        //System.out.println("Finished writing dimsFile: " + filename);
        }
        
        public static MatrixCharacteristics[] processDimsFiles(String dir, 
MatrixCharacteristics[] stats) 
@@ -461,136 +461,97 @@ public class MapReduceTool
                Path pt = new Path(mtdfile);
                FileSystem fs = FileSystem.get(_rJob);
                BufferedWriter br = new BufferedWriter(new 
OutputStreamWriter(fs.create(pt,true)));
-               formatProperties = (formatProperties==null && 
outinfo==OutputInfo.CSVOutputInfo) ?
-                               new CSVFileFormatProperties() : 
formatProperties;
-               OrderedJSONObject mtd = new OrderedJSONObject(); // maintain 
order in output file
 
                try {
-                       // build JSON metadata object
-                       mtd.put(DataExpression.DATATYPEPARAM, 
dt.toString().toLowerCase());
-                       if (schema == null)
-                               switch (vt) {
-                                       case DOUBLE:
-                                               
mtd.put(DataExpression.VALUETYPEPARAM, "double");
-                                               break;
-                                       case INT:
-                                               
mtd.put(DataExpression.VALUETYPEPARAM, "int");
-                                               break;
-                                       case BOOLEAN:
-                                               
mtd.put(DataExpression.VALUETYPEPARAM, "boolean");
-                                               break;
-                                       case STRING:
-                                               
mtd.put(DataExpression.VALUETYPEPARAM, "string");
-                                               break;
-                                       case UNKNOWN:
-                                               
mtd.put(DataExpression.VALUETYPEPARAM, "unknown");
-                                               break;
-                                       case OBJECT:
-                                               
mtd.put(DataExpression.VALUETYPEPARAM, "object");
-                                               break;
-                               }
-                       else
-                       {
-                               StringBuffer schemaStrBuffer = new 
StringBuffer();
-                               for(int i=0; i < schema.size(); i++) {
-                                       switch (schema.get(i)) {
-                                               case DOUBLE:
-                                                       
schemaStrBuffer.append("DOUBLE");
-                                                       break;
-                                               case INT:
-                                                       
schemaStrBuffer.append("INT");
-                                                       break;
-                                               case BOOLEAN:
-                                                       
schemaStrBuffer.append("BOOLEAN");
-                                                       break;
-                                               case STRING:
-                                                       
schemaStrBuffer.append("STRING");
-                                                       break;
-                                               case UNKNOWN:
-                                               default:
-                                                       
schemaStrBuffer.append("*");
-                                                       break;
-                                       }
-                                       
schemaStrBuffer.append(DataExpression.DEFAULT_DELIM_DELIMITER);
-                               }
-                               mtd.put(DataExpression.SCHEMAPARAM, 
schemaStrBuffer.toString());
-                       }
-                       mtd.put(DataExpression.READROWPARAM, mc.getRows());
-                       mtd.put(DataExpression.READCOLPARAM, mc.getCols());
-                       // only output rows_in_block and cols_in_block for 
matrix binary format
-                       if (outinfo == OutputInfo.BinaryBlockOutputInfo && 
dt.isMatrix() ) {
-                               mtd.put(DataExpression.ROWBLOCKCOUNTPARAM, 
mc.getRowsPerBlock());
-                               mtd.put(DataExpression.COLUMNBLOCKCOUNTPARAM, 
mc.getColsPerBlock());
-                       }
-                       // only output nnz for matrix
-                       if( dt.isMatrix() ) {
-                               mtd.put(DataExpression.READNUMNONZEROPARAM, 
mc.getNonZeros());
-                       }
-                       if (outinfo == OutputInfo.TextCellOutputInfo) {
-                               mtd.put(DataExpression.FORMAT_TYPE, "text");
-                       } else if (outinfo == OutputInfo.BinaryBlockOutputInfo 
|| outinfo == OutputInfo.BinaryCellOutputInfo ) {
-                               mtd.put(DataExpression.FORMAT_TYPE, "binary");
-                       } else if (outinfo == OutputInfo.CSVOutputInfo) {
-                               mtd.put(DataExpression.FORMAT_TYPE, "csv");
-                       } else {
-                               mtd.put(DataExpression.FORMAT_TYPE, 
"specialized");
-                       }
-                       if (outinfo == OutputInfo.CSVOutputInfo) {
-                               CSVFileFormatProperties csvProperties = 
(CSVFileFormatProperties) formatProperties;
-                               mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, 
csvProperties.hasHeader());
-                               mtd.put(DataExpression.DELIM_DELIMITER, 
csvProperties.getDelim());
-                       }
-                       mtd.put(DataExpression.DESCRIPTIONPARAM,
-                                       new 
OrderedJSONObject().put(DataExpression.AUTHORPARAM, "SystemML"));
-
-                       // write metadata JSON object to file
-                       mtd.write(br, 4); // indent with 4 spaces
+                       String mtd = metaDataToString(mtdfile, vt, schema, dt, 
mc, outinfo, formatProperties);
+                       br.write(mtd);
                        br.close();
                } catch (Exception e) {
                        throw new IOException("Error creating and writing 
metadata JSON file", e);
                }
        }
 
-       public static void writeScalarMetaDataFile(String mtdfile, ValueType v) 
throws IOException {
-               Path pt=new Path(mtdfile);
+       public static void writeScalarMetaDataFile(String mtdfile, ValueType 
vt) 
+               throws IOException 
+       {
+               Path pt = new Path(mtdfile);
                FileSystem fs = FileSystem.get(_rJob);
-               BufferedWriter br=new BufferedWriter(new 
OutputStreamWriter(fs.create(pt,true)));
-               OrderedJSONObject mtd = new OrderedJSONObject(); // maintain 
order in output file
+               BufferedWriter br = new BufferedWriter(new 
OutputStreamWriter(fs.create(pt,true)));
 
                try {
-                       // build JSON metadata object
-                       mtd.put(DataExpression.DATATYPEPARAM, "scalar");
-                       switch (v) {
-                               case DOUBLE:
-                                       mtd.put(DataExpression.VALUETYPEPARAM, 
"double");
-                                       break;
-                               case INT:
-                                       mtd.put(DataExpression.VALUETYPEPARAM, 
"int");
-                                       break;
-                               case BOOLEAN:
-                                       mtd.put(DataExpression.VALUETYPEPARAM, 
"boolean");
-                                       break;
-                               case STRING:
-                                       mtd.put(DataExpression.VALUETYPEPARAM, 
"string");
-                                       break;
-                               case UNKNOWN:
-                                       mtd.put(DataExpression.VALUETYPEPARAM, 
"unknown");
-                                       break;
-                               case OBJECT:
-                                       throw new IOException("Write of generic 
object types not supported.");
-                       }
-                       mtd.put(DataExpression.FORMAT_TYPE, "text");
-                       mtd.put(DataExpression.DESCRIPTIONPARAM,
-                                       new 
OrderedJSONObject().put(DataExpression.AUTHORPARAM, "SystemML"));
-
-                       // write metadata JSON object to file
-                       mtd.write(br, 4); // indent with 4 spaces
+                       String mtd = metaDataToString(mtdfile, vt, null, 
+                               DataType.SCALAR, null, 
OutputInfo.TextCellOutputInfo, null);
+                       br.write(mtd);
                        br.close();
-               } catch (Exception e) {
+               } 
+               catch (Exception e) {
                        throw new IOException("Error creating and writing 
metadata JSON file", e);
                }
        }
        
+       /**
+        * 
+        * @param mtdfile
+        * @param vt
+        * @param schema
+        * @param dt
+        * @param mc
+        * @param outinfo
+        * @param formatProperties
+        * @return
+        * @throws JSONException 
+        * @throws DMLRuntimeException 
+        */
+       public static String metaDataToString(String mtdfile, ValueType vt, 
List<ValueType> schema, DataType dt, MatrixCharacteristics mc, 
+                       OutputInfo outinfo, FileFormatProperties 
formatProperties) throws JSONException, DMLRuntimeException
+       {
+               OrderedJSONObject mtd = new OrderedJSONObject(); // maintain 
order in output file
+
+               //handle data type and value types (incl schema for frames)
+               mtd.put(DataExpression.DATATYPEPARAM, 
dt.toString().toLowerCase());
+               if (schema == null) {
+                       mtd.put(DataExpression.VALUETYPEPARAM, 
vt.toString().toLowerCase());
+               }       
+               else {
+                       StringBuffer schemaSB = new StringBuffer();
+                       for(int i=0; i < schema.size(); i++) {
+                               if( schema.get(i) == ValueType.UNKNOWN )
+                                       schemaSB.append("*");
+                               else
+                                       
schemaSB.append(schema.get(i).toString());
+                               
schemaSB.append(DataExpression.DEFAULT_DELIM_DELIMITER);
+                       }
+                       mtd.put(DataExpression.SCHEMAPARAM, 
schemaSB.toString());
+               }
+               
+               //handle output dimensions
+               if( !dt.isScalar() ) {
+                       mtd.put(DataExpression.READROWPARAM, mc.getRows());
+                       mtd.put(DataExpression.READCOLPARAM, mc.getCols());
+                       // handle output nnz and binary block configuration
+                       if( dt.isMatrix() ) {
+                               if (outinfo == OutputInfo.BinaryBlockOutputInfo 
) {
+                                       
mtd.put(DataExpression.ROWBLOCKCOUNTPARAM, mc.getRowsPerBlock());
+                                       
mtd.put(DataExpression.COLUMNBLOCKCOUNTPARAM, mc.getColsPerBlock());
+                               }
+                               mtd.put(DataExpression.READNUMNONZEROPARAM, 
mc.getNonZeros());
+                       }
+               }
+                       
+               //handle format type and additional arguments   
+               mtd.put(DataExpression.FORMAT_TYPE, 
OutputInfo.outputInfoToStringExternal(outinfo));
+               if (outinfo == OutputInfo.CSVOutputInfo) {
+                       CSVFileFormatProperties csvProperties = 
(formatProperties==null) ?
+                               new CSVFileFormatProperties() : 
(CSVFileFormatProperties)formatProperties;
+                       mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, 
csvProperties.hasHeader());
+                       mtd.put(DataExpression.DELIM_DELIMITER, 
csvProperties.getDelim());
+               }
+               mtd.put(DataExpression.DESCRIPTIONPARAM,
+                       new OrderedJSONObject().put(DataExpression.AUTHORPARAM, 
"SystemML"));
+
+               return mtd.toString(4); // indent with 4 spaces 
+       }
+       
        public static double[][] readMatrixFromHDFS(String dir, InputInfo 
inputinfo, long rlen, long clen, int brlen, int bclen) 
                throws IOException, DMLRuntimeException
        {

Reply via email to