Repository: incubator-systemml Updated Branches: refs/heads/master c04fc99fc -> e455a5599
[SYSTEMML-477] Updating the format and generation logic of the JSON metadata file. Updating the formatting and logic for JSON metadata file generation by replacing the old, custom string concatenation code with JSON library calls. Using an OrderedJSONObject in order to maintain consistent metadata ordering for the user's benefit. Closes #48. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e455a559 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e455a559 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e455a559 Branch: refs/heads/master Commit: e455a5599781e7cf6c89c6232668a2746bd6dba6 Parents: c04fc99 Author: Mike Dusenberry <[email protected]> Authored: Wed Jan 20 17:20:46 2016 -0800 Committer: Mike Dusenberry <[email protected]> Committed: Wed Jan 20 17:20:46 2016 -0800 ---------------------------------------------------------------------- .../org/apache/sysml/parser/DataExpression.java | 5 +- .../sysml/runtime/util/MapReduceTool.java | 213 +++++++++---------- 2 files changed, 103 insertions(+), 115 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e455a559/src/main/java/org/apache/sysml/parser/DataExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java b/src/main/java/org/apache/sysml/parser/DataExpression.java index fbaf6a2..29ffb18 100644 --- a/src/main/java/org/apache/sysml/parser/DataExpression.java +++ b/src/main/java/org/apache/sysml/parser/DataExpression.java @@ -78,8 +78,9 @@ public class DataExpression extends DataIdentifier public static final String COLUMNBLOCKCOUNTPARAM = "cols_in_block"; public static final String DATATYPEPARAM = "data_type"; public static final String VALUETYPEPARAM = "value_type"; - public static final String DESCRIPTIONPARAM = "description"; - + public static final String DESCRIPTIONPARAM = "description"; + public static final String AUTHORPARAM = "author"; + // Parameter names relevant to reading/writing delimited/csv files public static final String DELIM_DELIMITER = "sep"; public static final String DELIM_HAS_HEADER_ROW = "header"; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e455a559/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java index 8f5eb9d..be182cd 100644 --- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java +++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java @@ -53,6 +53,7 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.NumItemsByEachReducerMetaData; import org.apache.sysml.runtime.matrix.data.OutputInfo; import org.apache.sysml.runtime.matrix.sort.ReadWithZeros; +import org.apache.wink.json4j.OrderedJSONObject; public class MapReduceTool @@ -439,123 +440,109 @@ public class MapReduceTool public static void writeMetaDataFile ( String mtdfile, ValueType v, MatrixCharacteristics mc, OutputInfo outinfo) throws IOException { writeMetaDataFile(mtdfile, v, mc, outinfo, null); } - - public static void writeMetaDataFile( String mtdfile, ValueType v, MatrixCharacteristics mc, OutputInfo outinfo, FileFormatProperties formatProperties ) - throws IOException - { + + public static void writeMetaDataFile(String mtdfile, ValueType v, MatrixCharacteristics mc, OutputInfo outinfo, + FileFormatProperties formatProperties) throws IOException { Path pt = new Path(mtdfile); - FileSystem fs = FileSystem.get(_rJob); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); - formatProperties = (formatProperties==null && outinfo==OutputInfo.CSVOutputInfo) ? - new CSVFileFormatProperties() : formatProperties; + FileSystem fs = FileSystem.get(_rJob); + BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); + formatProperties = (formatProperties==null && outinfo==OutputInfo.CSVOutputInfo) ? + new CSVFileFormatProperties() : formatProperties; + OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file - String line = ""; - - try { - line += "{ \n" + - " \"" + DataExpression.DATATYPEPARAM + "\": \"matrix\"\n" + - " ,\"" + DataExpression.VALUETYPEPARAM + "\": "; - - switch (v) { - case DOUBLE: - line += "\"double\"\n"; - break; - case INT: - line += "\"int\"\n"; - break; - case BOOLEAN: - line += "\"boolean\"\n"; - break; - case STRING: - line += "\"string\"\n"; - break; - case UNKNOWN: - line += "\"unknown\"\n"; - break; - case OBJECT: - line += "\"object\"\n"; - break; - }; - - line += - " ,\"" + DataExpression.READROWPARAM + "\": " + mc.getRows() + "\n" + - " ,\"" + DataExpression.READCOLPARAM + "\": " + mc.getCols() + "\n"; - // only output rows_in_block and cols_in_block for binary format - if ( outinfo == OutputInfo.BinaryBlockOutputInfo) { - line += " ,\"" + DataExpression.ROWBLOCKCOUNTPARAM + "\": " + mc.getRowsPerBlock() + "\n" + - " ,\"" + DataExpression.COLUMNBLOCKCOUNTPARAM + "\": " + mc.getColsPerBlock() + "\n"; - } - - line += " ,\"" + DataExpression.READNUMNONZEROPARAM + "\": " + mc.getNonZeros() + "\n" + - " ,\"" + DataExpression.FORMAT_TYPE + "\": "; - - if ( outinfo == OutputInfo.TextCellOutputInfo ) { - line += "\"text\"\n"; - } else if (outinfo == OutputInfo.BinaryBlockOutputInfo || outinfo == OutputInfo.BinaryCellOutputInfo ) { - line += "\"binary\"\n"; // currently, there is no way to differentiate between them - } else if (outinfo == OutputInfo.CSVOutputInfo ) { - line += "\"csv\"\n"; - } else { - line += "\"specialized\"\n"; - } - - if ( outinfo == OutputInfo.CSVOutputInfo) { - CSVFileFormatProperties csvProperties = (CSVFileFormatProperties) formatProperties; - line += " ,\"" + DataExpression.DELIM_HAS_HEADER_ROW + "\": " + csvProperties.hasHeader() + "\n"; - line += " ,\"" + DataExpression.DELIM_DELIMITER + "\": \"" + csvProperties.getDelim() + "\"\n"; - } - - line += " ,\"description\": { \"author\": \"SystemML\" } \n" + "}" ; - - br.write(line); - - br.close(); - }catch (Exception e) { - throw new IOException(e); + try { + // build JSON metadata object + mtd.put(DataExpression.DATATYPEPARAM, "matrix"); + switch (v) { + case DOUBLE: + mtd.put(DataExpression.VALUETYPEPARAM, "double"); + break; + case INT: + mtd.put(DataExpression.VALUETYPEPARAM, "int"); + break; + case BOOLEAN: + mtd.put(DataExpression.VALUETYPEPARAM, "boolean"); + break; + case STRING: + mtd.put(DataExpression.VALUETYPEPARAM, "string"); + break; + case UNKNOWN: + mtd.put(DataExpression.VALUETYPEPARAM, "unknown"); + break; + case OBJECT: + mtd.put(DataExpression.VALUETYPEPARAM, "object"); + break; + } + mtd.put(DataExpression.READROWPARAM, mc.getRows()); + mtd.put(DataExpression.READCOLPARAM, mc.getCols()); + // only output rows_in_block and cols_in_block for binary format + if (outinfo == OutputInfo.BinaryBlockOutputInfo) { + mtd.put(DataExpression.ROWBLOCKCOUNTPARAM, mc.getRowsPerBlock()); + mtd.put(DataExpression.COLUMNBLOCKCOUNTPARAM, mc.getColsPerBlock()); + } + mtd.put(DataExpression.READNUMNONZEROPARAM, mc.getNonZeros()); + if (outinfo == OutputInfo.TextCellOutputInfo) { + mtd.put(DataExpression.FORMAT_TYPE, "text"); + } else if (outinfo == OutputInfo.BinaryBlockOutputInfo || outinfo == OutputInfo.BinaryCellOutputInfo ) { + mtd.put(DataExpression.FORMAT_TYPE, "binary"); + } else if (outinfo == OutputInfo.CSVOutputInfo) { + mtd.put(DataExpression.FORMAT_TYPE, "csv"); + } else { + mtd.put(DataExpression.FORMAT_TYPE, "specialized"); + } + if (outinfo == OutputInfo.CSVOutputInfo) { + CSVFileFormatProperties csvProperties = (CSVFileFormatProperties) formatProperties; + mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, csvProperties.hasHeader()); + mtd.put(DataExpression.DELIM_DELIMITER, csvProperties.getDelim()); + } + mtd.put(DataExpression.DESCRIPTIONPARAM, + new OrderedJSONObject().put(DataExpression.AUTHORPARAM, "SystemML")); + + // write metadata JSON object to file + mtd.write(br, 4); // indent with 4 spaces + br.close(); + } catch (Exception e) { + throw new IOException("Error creating and writing metadata JSON file", e); } } - - - public static void writeScalarMetaDataFile ( String mtdfile, ValueType v ) throws IOException { - - Path pt=new Path(mtdfile); - FileSystem fs = FileSystem.get(_rJob); - BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); - - try { - String line = ""; - line += "{ \n" + - " \"" + DataExpression.DATATYPEPARAM + "\": \"scalar\"\n" + - " ,\"" + DataExpression.VALUETYPEPARAM + "\": "; - - switch (v) { - case DOUBLE: - line += "\"double\"\n"; - break; - case INT: - line += "\"int\"\n"; - break; - case BOOLEAN: - line += "\"boolean\"\n"; - break; - case STRING: - line += "\"string\"\n"; - break; - case UNKNOWN: - line += "\"unknown\"\n"; - break; - case OBJECT: - throw new IOException("Write of generic object types not supported."); - }; - - line += " ,\"" + DataExpression.FORMAT_TYPE + "\": \"text\"\n" + - " ,\"description\": { \"author\": \"SystemML\" } \n" +" }" ; - - br.write(line); - - br.close(); - }catch (Exception e) { - throw new IOException(e); + + public static void writeScalarMetaDataFile(String mtdfile, ValueType v) throws IOException { + Path pt=new Path(mtdfile); + FileSystem fs = FileSystem.get(_rJob); + BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); + OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file + + try { + // build JSON metadata object + mtd.put(DataExpression.DATATYPEPARAM, "scalar"); + switch (v) { + case DOUBLE: + mtd.put(DataExpression.VALUETYPEPARAM, "double"); + break; + case INT: + mtd.put(DataExpression.VALUETYPEPARAM, "int"); + break; + case BOOLEAN: + mtd.put(DataExpression.VALUETYPEPARAM, "boolean"); + break; + case STRING: + mtd.put(DataExpression.VALUETYPEPARAM, "string"); + break; + case UNKNOWN: + mtd.put(DataExpression.VALUETYPEPARAM, "unknown"); + break; + case OBJECT: + throw new IOException("Write of generic object types not supported."); + } + mtd.put(DataExpression.FORMAT_TYPE, "text"); + mtd.put(DataExpression.DESCRIPTIONPARAM, + new OrderedJSONObject().put(DataExpression.AUTHORPARAM, "SystemML")); + + // write metadata JSON object to file + mtd.write(br, 4); // indent with 4 spaces + br.close(); + } catch (Exception e) { + throw new IOException("Error creating and writing metadata JSON file", e); } }
