Repository: incubator-systemml
Updated Branches:
  refs/heads/master 578e595fd -> e2492fb61


[SYSTEMML-527] Write function description parameter

Add description parameter to write function to allow writing description
to metadata. Additionally set author to be user name if available and add
a created timestamp.

Closes #364.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e2492fb6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e2492fb6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e2492fb6

Branch: refs/heads/master
Commit: e2492fb61665fe55a0ccee34116c10bf84f38fbf
Parents: 578e595
Author: Deron Eriksson <[email protected]>
Authored: Sat Feb 4 10:19:11 2017 -0800
Committer: Deron Eriksson <[email protected]>
Committed: Sat Feb 4 10:19:11 2017 -0800

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/lops/Data.java   | 14 ++++++++-
 .../org/apache/sysml/parser/DataExpression.java |  7 ++++-
 .../apache/sysml/parser/OutputStatement.java    |  3 +-
 .../instructions/cp/VariableCPInstruction.java  | 18 +++++++++---
 .../instructions/spark/WriteSPInstruction.java  | 15 +++++++++-
 .../matrix/data/FileFormatProperties.java       | 14 +++++++--
 .../sysml/runtime/util/MapReduceTool.java       | 31 ++++++++++++++++----
 7 files changed, 86 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/lops/Data.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/Data.java 
b/src/main/java/org/apache/sysml/lops/Data.java
index 2cf125e..8e723c4 100644
--- a/src/main/java/org/apache/sysml/lops/Data.java
+++ b/src/main/java/org/apache/sysml/lops/Data.java
@@ -494,7 +494,19 @@ public class Data extends Lop
                        }
                        
                }
-               
+
+               if (operation == OperationTypes.WRITE) {
+                       sb.append(OPERAND_DELIMITOR);
+                       Lop descriptionLop = 
getInputParams().get(DataExpression.DESCRIPTIONPARAM);
+                       if (descriptionLop != null) {
+                               boolean descLiteral = (descriptionLop 
instanceof Data && ((Data) descriptionLop).isLiteral());
+                               
sb.append(prepOperand(descriptionLop.getOutputParameters().getLabel(), 
DataType.SCALAR,
+                                               ValueType.STRING, descLiteral));
+                       } else {
+                               sb.append(prepOperand("", DataType.SCALAR, 
ValueType.STRING, true));
+                       }
+               }
+
                return sb.toString();
        }
        

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/DataExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java 
b/src/main/java/org/apache/sysml/parser/DataExpression.java
index cd9a862..9370bdd 100644
--- a/src/main/java/org/apache/sysml/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysml/parser/DataExpression.java
@@ -78,6 +78,7 @@ public class DataExpression extends DataIdentifier
        public static final String DESCRIPTIONPARAM = "description";
        public static final String AUTHORPARAM = "author";
        public static final String SCHEMAPARAM = "schema";
+       public static final String CREATEDPARAM = "created";
 
        // Parameter names relevant to reading/writing delimited/csv files
        public static final String DELIM_DELIMITER = "sep";
@@ -100,6 +101,7 @@ public class DataExpression extends DataIdentifier
        public static final String[] READ_VALID_MTD_PARAM_NAMES = 
                { IO_FILENAME, READROWPARAM, READCOLPARAM, READNUMNONZEROPARAM, 
FORMAT_TYPE,
                        ROWBLOCKCOUNTPARAM, COLUMNBLOCKCOUNTPARAM, 
DATATYPEPARAM, VALUETYPEPARAM, SCHEMAPARAM, DESCRIPTIONPARAM,
+                       AUTHORPARAM, CREATEDPARAM,
                        // Parameters related to delimited/csv files.
                        DELIM_FILL_VALUE, DELIM_DELIMITER, DELIM_FILL, 
DELIM_HAS_HEADER_ROW, DELIM_NA_STRINGS
                }; 
@@ -1836,7 +1838,10 @@ public class DataExpression extends DataIdentifier
                        {
                                // if the read method does not specify 
parameter value, then add MTD metadata file value to parameter list
                                if (getVarParam(key.toString()) == null){
-                                       if ( 
!key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) {
+                                       if (( 
!key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) &&
+                                                       ( 
!key.toString().equalsIgnoreCase(AUTHORPARAM) ) &&
+                                                       ( 
!key.toString().equalsIgnoreCase(CREATEDPARAM) ) )
+                                       {
                                                StringIdentifier strId = new 
StringIdentifier(val.toString(),
                                                                
this.getFilename(), this.getBeginLine(), this.getBeginColumn(), 
                                                                
this.getEndLine(), this.getEndColumn());

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/parser/OutputStatement.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/OutputStatement.java 
b/src/main/java/org/apache/sysml/parser/OutputStatement.java
index d42432c..00859be 100644
--- a/src/main/java/org/apache/sysml/parser/OutputStatement.java
+++ b/src/main/java/org/apache/sysml/parser/OutputStatement.java
@@ -35,7 +35,8 @@ public class OutputStatement extends Statement
                                                                                
                                                DataExpression.FORMAT_TYPE, 
                                                                                
                                                DataExpression.DELIM_DELIMITER, 
                                                                                
                                                
DataExpression.DELIM_HAS_HEADER_ROW, 
-                                                                               
                                                DataExpression.DELIM_SPARSE};
+                                                                               
                                                DataExpression.DELIM_SPARSE,
+                                                                               
                                                
DataExpression.DESCRIPTIONPARAM};
 
        public DataIdentifier getIdentifier(){
                return _id;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
index d5ce3f7..78fe330 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
@@ -101,6 +101,7 @@ public class VariableCPInstruction extends CPInstruction
        private CPOperand input1;
        private CPOperand input2;
        private CPOperand input3;
+       private CPOperand input4;
        private CPOperand output;
        private MetaData metadata;
        private UpdateType _updateType;
@@ -274,15 +275,15 @@ public class VariableCPInstruction extends CPInstruction
                else if ( voc == VariableOperationCode.Write ) {
                        // All write instructions have 3 parameters, except in 
case of delimited/csv file.
                        // Write instructions for csv files also include three 
additional parameters (hasHeader, delimiter, sparse)
-                       if ( parts.length != 4 && parts.length != 7 )
-                               throw new DMLRuntimeException("Invalid number 
of operands in createvar instruction: " + str);
+                       if ( parts.length != 5 && parts.length != 8 )
+                               throw new DMLRuntimeException("Invalid number 
of operands in write instruction: " + str);
                }
                else {
                        _arity = getArity(voc);
                        InstructionUtils.checkNumFields ( parts, _arity ); // 
no output
                }
                
-               CPOperand in1=null, in2=null, in3=null, out=null;
+               CPOperand in1=null, in2=null, in3=null, in4=null, out=null;
                
                switch (voc) {
                
@@ -413,6 +414,13 @@ public class VariableCPInstruction extends CPInstruction
                                boolean sparse = Boolean.parseBoolean(parts[6]);
                                FileFormatProperties formatProperties = new 
CSVFileFormatProperties(hasHeader, delim, sparse);
                                inst.setFormatProperties(formatProperties);
+                               in4 = new CPOperand(parts[7]); // description
+                               inst.input4 = in4;
+                       } else {
+                               FileFormatProperties ffp = new 
FileFormatProperties();
+                               inst.setFormatProperties(ffp);
+                               in4 = new CPOperand(parts[4]); // description
+                               inst.input4 = in4;
                        }
                        return inst;
                        
@@ -745,6 +753,8 @@ public class VariableCPInstruction extends CPInstruction
        {
                //get filename (literal or variable expression)
                String fname = ec.getScalarInput(input2.getName(), 
ValueType.STRING, input2.isLiteral()).getStringValue();
+               String desc = ec.getScalarInput(input4.getName(), 
ValueType.STRING, input4.isLiteral()).getStringValue();
+               _formatProperties.setDescription(desc);
                
                if( input1.getDataType() == DataType.SCALAR ) {
                        writeScalarToHDFS(ec, fname);
@@ -758,7 +768,7 @@ public class VariableCPInstruction extends CPInstruction
                        else {
                                // Default behavior
                                MatrixObject mo = 
ec.getMatrixObject(input1.getName());
-                               mo.exportData(fname, outFmt);
+                               mo.exportData(fname, outFmt, _formatProperties);
                        }
                }
                else if( input1.getDataType() == DataType.FRAME ) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
index 431ff24..912dbe3 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/WriteSPInstruction.java
@@ -55,6 +55,7 @@ public class WriteSPInstruction extends SPInstruction
        private CPOperand input1 = null; 
        private CPOperand input2 = null;
        private CPOperand input3 = null;
+       private CPOperand input4 = null;
        private FileFormatProperties formatProperties;
        
        //scalars might occur for transform
@@ -82,7 +83,7 @@ public class WriteSPInstruction extends SPInstruction
                
                // All write instructions have 3 parameters, except in case of 
delimited/csv file.
                // Write instructions for csv files also include three 
additional parameters (hasHeader, delimiter, sparse)
-               if ( parts.length != 4 && parts.length != 8 ) {
+               if ( parts.length != 5 && parts.length != 9 ) {
                        throw new DMLRuntimeException("Invalid number of 
operands in write instruction: " + str);
                }
                
@@ -103,6 +104,15 @@ public class WriteSPInstruction extends SPInstruction
                        
                        boolean isInputMB = Boolean.parseBoolean(parts[7]);
                        inst.setInputMatrixBlock(isInputMB);
+
+                       CPOperand in4 = new CPOperand(parts[8]);
+                       inst.input4 = in4;
+               } else {
+                       FileFormatProperties ffp = new FileFormatProperties();
+
+                       CPOperand in4 = new CPOperand(parts[4]);
+                       inst.input4 = in4;
+                       inst.setFormatProperties(ffp);
                }
                return inst;            
        }
@@ -132,6 +142,9 @@ public class WriteSPInstruction extends SPInstruction
 
                //get filename (literal or variable expression)
                String fname = ec.getScalarInput(input2.getName(), 
ValueType.STRING, input2.isLiteral()).getStringValue();
+               String desc = ec.getScalarInput(input4.getName(), 
ValueType.STRING, input4.isLiteral()).getStringValue();
+               formatProperties.setDescription(desc);
+
                ValueType[] schema = (input1.getDataType()==DataType.FRAME) ? 
                                
sec.getFrameObject(input1.getName()).getSchema() : null;
                

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
index b782d7b..2f405da 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/FileFormatProperties.java
@@ -22,16 +22,17 @@ package org.apache.sysml.runtime.matrix.data;
 public class FileFormatProperties 
 {
        
+       private String description;
        
        public enum FileFormat { CSV, NATIVE };
        
        FileFormat fmt;
        
-       FileFormatProperties() {
+       public FileFormatProperties() {
                fmt = FileFormat.NATIVE;
        }
        
-       FileFormatProperties(FileFormat fmt) {
+       public FileFormatProperties(FileFormat fmt) {
                this.fmt = fmt;
        }
        
@@ -42,4 +43,13 @@ public class FileFormatProperties
        public FileFormat getFileFormat() {
                return fmt;
        }
+
+       public String getDescription() {
+               return description;
+       }
+
+       public void setDescription(String description) {
+               this.description = description;
+       }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e2492fb6/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java 
b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
index 6f083f7..ff121b6 100644
--- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
+++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java
@@ -25,7 +25,11 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringEscapeUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -386,7 +390,7 @@ public class MapReduceTool
                BufferedWriter br = new BufferedWriter(new 
OutputStreamWriter(fs.create(pt,true)));
 
                try {
-                       String mtd = metaDataToString(mtdfile, vt, schema, dt, 
mc, outinfo, formatProperties);
+                       String mtd = metaDataToString(vt, schema, dt, mc, 
outinfo, formatProperties);
                        br.write(mtd);
                        br.close();
                } catch (Exception e) {
@@ -402,8 +406,7 @@ public class MapReduceTool
                BufferedWriter br = new BufferedWriter(new 
OutputStreamWriter(fs.create(pt,true)));
 
                try {
-                       String mtd = metaDataToString(mtdfile, vt, null, 
-                               DataType.SCALAR, null, 
OutputInfo.TextCellOutputInfo, null);
+                       String mtd = metaDataToString(vt, null, 
DataType.SCALAR, null, OutputInfo.TextCellOutputInfo, null);
                        br.write(mtd);
                        br.close();
                } 
@@ -412,7 +415,7 @@ public class MapReduceTool
                }
        }
 
-       public static String metaDataToString(String mtdfile, ValueType vt, 
ValueType[] schema, DataType dt, MatrixCharacteristics mc, 
+       public static String metaDataToString(ValueType vt, ValueType[] schema, 
DataType dt, MatrixCharacteristics mc,
                        OutputInfo outinfo, FileFormatProperties 
formatProperties) throws JSONException, DMLRuntimeException
        {
                OrderedJSONObject mtd = new OrderedJSONObject(); // maintain 
order in output file
@@ -456,8 +459,24 @@ public class MapReduceTool
                        mtd.put(DataExpression.DELIM_HAS_HEADER_ROW, 
csvProperties.hasHeader());
                        mtd.put(DataExpression.DELIM_DELIMITER, 
csvProperties.getDelim());
                }
-               mtd.put(DataExpression.DESCRIPTIONPARAM,
-                       new OrderedJSONObject().put(DataExpression.AUTHORPARAM, 
"SystemML"));
+
+               if (formatProperties != null) {
+                       String description = formatProperties.getDescription();
+                       if (StringUtils.isNotEmpty(description)) {
+                               String jsonDescription = 
StringEscapeUtils.escapeJson(description);
+                               mtd.put(DataExpression.DESCRIPTIONPARAM, 
jsonDescription);
+                       }
+               }
+
+               String userName = System.getProperty("user.name");
+               if (StringUtils.isNotEmpty(userName)) {
+                       mtd.put(DataExpression.AUTHORPARAM, userName);
+               } else {
+                       mtd.put(DataExpression.AUTHORPARAM, "SystemML");
+               }
+
+               SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd 
HH:mm:ss z");
+               mtd.put(DataExpression.CREATEDPARAM, sdf.format(new Date()));
 
                return mtd.toString(4); // indent with 4 spaces 
        }

Reply via email to