Repository: incubator-systemml Updated Branches: refs/heads/master 578e595fd -> e2492fb61
[SYSTEMML-527] Write function description parameter Add description parameter to write function to allow writing description to metadata. Additionally set author to be user name if available and add a created timestamp. Closes #364. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e2492fb6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e2492fb6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e2492fb6 Branch: refs/heads/master Commit: e2492fb61665fe55a0ccee34116c10bf84f38fbf Parents: 578e595 Author: Deron Eriksson <[email protected]> Authored: Sat Feb 4 10:19:11 2017 -0800 Committer: Deron Eriksson <[email protected]> Committed: Sat Feb 4 10:19:11 2017 -0800 ---------------------------------------------------------------------- src/main/java/org/apache/sysml/lops/Data.java | 14 ++++++++- .../org/apache/sysml/parser/DataExpression.java | 7 ++++- .../apache/sysml/parser/OutputStatement.java | 3 +- .../instructions/cp/VariableCPInstruction.java | 18 +++++++++--- .../instructions/spark/WriteSPInstruction.java | 15 +++++++++- .../matrix/data/FileFormatProperties.java | 14 +++++++-- .../sysml/runtime/util/MapReduceTool.java | 31 ++++++++++++++++---- 7 files changed, 86 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/lops/Data.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/Data.java b/src/main/java/org/apache/sysml/lops/Data.java index 2cf125e..8e723c4 100644 --- a/src/main/java/org/apache/sysml/lops/Data.java +++ b/src/main/java/org/apache/sysml/lops/Data.java @@ -494,7 +494,19 @@ public class Data extends Lop } } - + + if (operation == OperationTypes.WRITE) { + sb.append(OPERAND_DELIMITOR); + Lop descriptionLop = getInputParams().get(DataExpression.DESCRIPTIONPARAM); + if (descriptionLop != null) { + boolean descLiteral = (descriptionLop instanceof Data && ((Data) descriptionLop).isLiteral()); + sb.append(prepOperand(descriptionLop.getOutputParameters().getLabel(), DataType.SCALAR, + ValueType.STRING, descLiteral)); + } else { + sb.append(prepOperand("", DataType.SCALAR, ValueType.STRING, true)); + } + } + return sb.toString(); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/DataExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java b/src/main/java/org/apache/sysml/parser/DataExpression.java index cd9a862..9370bdd 100644 --- a/src/main/java/org/apache/sysml/parser/DataExpression.java +++ b/src/main/java/org/apache/sysml/parser/DataExpression.java @@ -78,6 +78,7 @@ public class DataExpression extends DataIdentifier public static final String DESCRIPTIONPARAM = "description"; public static final String AUTHORPARAM = "author"; public static final String SCHEMAPARAM = "schema"; + public static final String CREATEDPARAM = "created"; // Parameter names relevant to reading/writing delimited/csv files public static final String DELIM_DELIMITER = "sep"; @@ -100,6 +101,7 @@ public class DataExpression extends DataIdentifier public static final String[] READ_VALID_MTD_PARAM_NAMES = { IO_FILENAME, READROWPARAM, READCOLPARAM, READNUMNONZEROPARAM, FORMAT_TYPE, ROWBLOCKCOUNTPARAM, COLUMNBLOCKCOUNTPARAM, DATATYPEPARAM, VALUETYPEPARAM, SCHEMAPARAM, DESCRIPTIONPARAM, + AUTHORPARAM, CREATEDPARAM, // Parameters related to delimited/csv files. DELIM_FILL_VALUE, DELIM_DELIMITER, DELIM_FILL, DELIM_HAS_HEADER_ROW, DELIM_NA_STRINGS }; @@ -1836,7 +1838,10 @@ public class DataExpression extends DataIdentifier { // if the read method does not specify parameter value, then add MTD metadata file value to parameter list if (getVarParam(key.toString()) == null){ - if ( !key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) { + if (( !key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) && + ( !key.toString().equalsIgnoreCase(AUTHORPARAM) ) && + ( !key.toString().equalsIgnoreCase(CREATEDPARAM) ) ) + { StringIdentifier strId = new StringIdentifier(val.toString(), this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn()); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/OutputStatement.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/OutputStatement.java b/src/main/java/org/apache/sysml/parser/OutputStatement.java index d42432c..00859be 100644 --- a/src/main/java/org/apache/sysml/parser/OutputStatement.java +++ b/src/main/java/org/apache/sysml/parser/OutputStatement.java @@ -35,7 +35,8 @@ public class OutputStatement extends Statement DataExpression.FORMAT_TYPE, DataExpression.DELIM_DELIMITER, DataExpression.DELIM_HAS_HEADER_ROW, - DataExpression.DELIM_SPARSE}; + DataExpression.DELIM_SPARSE, + DataExpression.DESCRIPTIONPARAM}; public DataIdentifier getIdentifier(){ return _id; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java index d5ce3f7..78fe330 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java @@ -101,6 +101,7 @@ public class VariableCPInstruction extends CPInstruction private CPOperand input1; private CPOperand input2; private CPOperand input3; + private CPOperand input4; private CPOperand output; private MetaData metadata; private UpdateType _updateType; @@ -274,15 +275,15 @@ public class VariableCPInstruction extends CPInstruction else if ( voc == VariableOperationCode.Write ) { // All write instructions have 3 parameters, except in case of delimited/csv file. // Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse) - if ( parts.length != 4 && parts.length != 7 ) - throw new DMLRuntimeException("Invalid number of operands in createvar instruction: " + str); + if ( parts.length != 5 && parts.length != 8 ) + throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str); } else { _arity = getArity(voc); InstructionUtils.checkNumFields ( parts, _arity ); // no output } - CPOperand in1=null, in2=null, in3=null, out=null; + CPOperand in1=null, in2=null, in3=null, in4=null, out=null; switch (voc) { @@ -413,6 +414,13 @@ public class VariableCPInstruction extends CPInstruction boolean sparse = Boolean.parseBoolean(parts[6]); FileFormatProperties formatProperties = new CSVFileFormatProperties(hasHeader, delim, sparse); inst.setFormatProperties(formatProperties); + in4 = new CPOperand(parts[7]); // description + inst.input4 = in4; + } else { + FileFormatProperties ffp = new FileFormatProperties(); + inst.setFormatProperties(ffp); + in4 = new CPOperand(parts[4]); // description + inst.input4 = in4; } return inst; @@ -745,6 +753,8 @@ public class VariableCPInstruction extends CPInstruction { //get filename (literal or variable expression) String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue(); + String desc = ec.getScalarInput(input4.getName(), ValueType.STRING, input4.isLiteral()).getStringValue(); + _formatProperties.setDescription(desc); if( input1.getDataType() == DataType.SCALAR ) { writeScalarToHDFS(ec, fname); @@ -758,7 +768,7 @@ public class VariableCPInstruction extends CPInstruction else { // Default behavior MatrixObject mo = ec.getMatrixObject(input1.getName()); - mo.exportData(fname, outFmt); + mo.exportData(fname, outFmt, _formatProperties); } } else if( input1.getDataType() == DataType.FRAME ) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java index 431ff24..912dbe3 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java @@ -55,6 +55,7 @@ public class WriteSPInstruction extends SPInstruction private CPOperand input1 = null; private CPOperand input2 = null; private CPOperand input3 = null; + private CPOperand input4 = null; private FileFormatProperties formatProperties; //scalars might occur for transform @@ -82,7 +83,7 @@ public class WriteSPInstruction extends SPInstruction // All write instructions have 3 parameters, except in case of delimited/csv file. // Write instructions for csv files also include three additional parameters (hasHeader, delimiter, sparse) - if ( parts.length != 4 && parts.length != 8 ) { + if ( parts.length != 5 && parts.length != 9 ) { throw new DMLRuntimeException("Invalid number of operands in write instruction: " + str); } @@ -103,6 +104,15 @@ public class WriteSPInstruction extends SPInstruction boolean isInputMB = Boolean.parseBoolean(parts[7]); inst.setInputMatrixBlock(isInputMB); + + CPOperand in4 = new CPOperand(parts[8]); + inst.input4 = in4; + } else { + FileFormatProperties ffp = new FileFormatProperties(); + + CPOperand in4 = new CPOperand(parts[4]); + inst.input4 = in4; + inst.setFormatProperties(ffp); } return inst; } @@ -132,6 +142,9 @@ public class WriteSPInstruction extends SPInstruction //get filename (literal or variable expression) String fname = ec.getScalarInput(input2.getName(), ValueType.STRING, input2.isLiteral()).getStringValue(); + String desc = ec.getScalarInput(input4.getName(), ValueType.STRING, input4.isLiteral()).getStringValue(); + formatProperties.setDescription(desc); + ValueType[] schema = (input1.getDataType()==DataType.FRAME) ? sec.getFrameObject(input1.getName()).getSchema() : null; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java index b782d7b..2f405da 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java @@ -22,16 +22,17 @@ package org.apache.sysml.runtime.matrix.data; public class FileFormatProperties { + private String description; public enum FileFormat { CSV, NATIVE }; FileFormat fmt; - FileFormatProperties() { + public FileFormatProperties() { fmt = FileFormat.NATIVE; } - FileFormatProperties(FileFormat fmt) { + public FileFormatProperties(FileFormat fmt) { this.fmt = fmt; } @@ -42,4 +43,13 @@ public class FileFormatProperties public FileFormat getFileFormat() { return fmt; } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java index 6f083f7..ff121b6 100644 --- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java +++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java @@ -25,7 +25,11 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; +import java.text.SimpleDateFormat; +import java.util.Date; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; @@ -386,7 +390,7 @@ public class MapReduceTool BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); try { - String mtd = metaDataToString(mtdfile, vt, schema, dt, mc, outinfo, formatProperties); + String mtd = metaDataToString(vt, schema, dt, mc, outinfo, formatProperties); br.write(mtd); br.close(); } catch (Exception e) { @@ -402,8 +406,7 @@ public class MapReduceTool BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); try { - String mtd = metaDataToString(mtdfile, vt, null, - DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null); + String mtd = metaDataToString(vt, null, DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null); br.write(mtd); br.close(); } @@ -412,7 +415,7 @@ public class MapReduceTool } } - public static String metaDataToString(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, + public static String metaDataToString(ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException { OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file @@ -456,8 +459,24 @@ public class MapReduceTool mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, csvProperties.hasHeader()); mtd.put(DataExpression.DELIM_DELIMITER, csvProperties.getDelim()); } - mtd.put(DataExpression.DESCRIPTIONPARAM, - new OrderedJSONObject().put(DataExpression.AUTHORPARAM, "SystemML")); + + if (formatProperties != null) { + String description = formatProperties.getDescription(); + if (StringUtils.isNotEmpty(description)) { + String jsonDescription = StringEscapeUtils.escapeJson(description); + mtd.put(DataExpression.DESCRIPTIONPARAM, jsonDescription); + } + } + + String userName = System.getProperty("user.name"); + if (StringUtils.isNotEmpty(userName)) { + mtd.put(DataExpression.AUTHORPARAM, userName); + } else { + mtd.put(DataExpression.AUTHORPARAM, "SystemML"); + } + + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z"); + mtd.put(DataExpression.CREATEDPARAM, sdf.format(new Date())); return mtd.toString(4); // indent with 4 spaces }
