Repository: incubator-systemml
Updated Branches:
  refs/heads/master ab45af17c -> 468e472a6


[SYSTEMML-569] Frame transform encode/decode/apply w/ column name specs

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/468e472a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/468e472a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/468e472a

Branch: refs/heads/master
Commit: 468e472a61d9ae7e1746ca1b48ad43e7ac4d79d8
Parents: ab45af1
Author: Matthias Boehm <[email protected]>
Authored: Sun Jul 10 21:28:57 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Mon Jul 11 11:25:03 2016 -0700

----------------------------------------------------------------------
 ...ReturnParameterizedBuiltinCPInstruction.java |   5 +-
 .../cp/ParameterizedBuiltinCPInstruction.java   |   7 +-
 ...ReturnParameterizedBuiltinSPInstruction.java |  14 ++-
 .../ParameterizedBuiltinSPInstruction.java      |  16 +--
 .../spark/utils/FrameRDDConverterUtils.java     |  20 ++--
 .../sysml/runtime/transform/BinAgent.java       |   8 +-
 .../sysml/runtime/transform/DummycodeAgent.java |   4 +-
 .../sysml/runtime/transform/MVImputeAgent.java  |   4 +-
 .../sysml/runtime/transform/OmitAgent.java      |   5 +-
 .../sysml/runtime/transform/RecodeAgent.java    |   6 +-
 .../apache/sysml/runtime/transform/TfUtils.java |  13 ++-
 .../transform/decode/DecoderFactory.java        |   6 +-
 .../transform/encode/EncoderFactory.java        |  30 +++---
 .../runtime/transform/meta/TfMetaUtils.java     |  86 ++++++++++-----
 .../functions/jmlc/FrameReadMetaTest.java       |   2 +-
 .../TransformFrameEncodeApplyTest.java          | 107 ++++++++++++++-----
 .../TransformFrameEncodeDecodeTest.java         |  44 ++++++--
 .../input/homes3/homes.tfspec_bin2.json         |   5 +
 .../input/homes3/homes.tfspec_dummy2.json       |   2 +
 .../input/homes3/homes.tfspec_impute2.json      |  10 ++
 .../input/homes3/homes.tfspec_omit2.json        |   2 +
 .../input/homes3/homes.tfspec_recode2.json      |   2 +
 22 files changed, 276 insertions(+), 122 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/instructions/cp/MultiReturnParameterizedBuiltinCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/MultiReturnParameterizedBuiltinCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/MultiReturnParameterizedBuiltinCPInstruction.java
index af076f7..05cf19e 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/MultiReturnParameterizedBuiltinCPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/MultiReturnParameterizedBuiltinCPInstruction.java
@@ -20,6 +20,7 @@
 package org.apache.sysml.runtime.instructions.cp;
 
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
@@ -81,11 +82,13 @@ public class MultiReturnParameterizedBuiltinCPInstruction 
extends ComputationCPI
                //obtain and pin input frame
                FrameBlock fin = ec.getFrameInput(input1.getName());
                String spec = ec.getScalarInput(input2.getName(), 
input2.getValueType(), input2.isLiteral()).getStringValue();
+               List<String> colnames = fin.getColumnNames(); 
                
                //execute block transform encode
-               Encoder encoder = EncoderFactory.createEncoder(spec, 
fin.getNumColumns(), null);
+               Encoder encoder = EncoderFactory.createEncoder(spec, colnames, 
fin.getNumColumns(), null);
                MatrixBlock data = encoder.encode(fin, new 
MatrixBlock(fin.getNumRows(), fin.getNumColumns(), false)); //build and apply
                FrameBlock meta = encoder.getMetaData(new 
FrameBlock(fin.getNumColumns(), ValueType.STRING));
+               meta.setColumnNames(colnames);
                
                //release input and outputs
                ec.releaseFrameInput(input1.getName());

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
index eca1627..a245d10 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
@@ -20,6 +20,7 @@
 package org.apache.sysml.runtime.instructions.cp;
 
 import java.util.HashMap;
+import java.util.List;
 
 import org.apache.sysml.lops.Lop;
 import org.apache.sysml.parser.ParameterizedBuiltinFunctionExpression;
@@ -269,9 +270,10 @@ public class ParameterizedBuiltinCPInstruction extends 
ComputationCPInstruction
                        //acquire locks
                        FrameBlock data = 
ec.getFrameInput(params.get("target"));
                        FrameBlock meta = ec.getFrameInput(params.get("meta")); 
        
+                       List<String> colNames = data.getColumnNames();
                        
                        //compute transformapply
-                       Encoder encoder = 
EncoderFactory.createEncoder(params.get("spec"), data.getNumColumns(), meta);
+                       Encoder encoder = 
EncoderFactory.createEncoder(params.get("spec"), colNames, 
data.getNumColumns(), meta);
                        MatrixBlock mbout = encoder.apply(data, new 
MatrixBlock(data.getNumRows(), data.getNumColumns(), false));
                        
                        //release locks
@@ -283,9 +285,10 @@ public class ParameterizedBuiltinCPInstruction extends 
ComputationCPInstruction
                        //acquire locks
                        MatrixBlock data = 
ec.getMatrixInput(params.get("target"));
                        FrameBlock meta = ec.getFrameInput(params.get("meta"));
+                       List<String> colnames = meta.getColumnNames();
                        
                        //compute transformdecode
-                       Decoder decoder = 
DecoderFactory.createDecoder(getParameterMap().get("spec"), null, meta);
+                       Decoder decoder = 
DecoderFactory.createDecoder(getParameterMap().get("spec"), colnames, null, 
meta);
                        FrameBlock fbout = decoder.decode(data, new 
FrameBlock(meta.getNumColumns(), ValueType.STRING));
                        
                        //release locks

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/instructions/spark/MultiReturnParameterizedBuiltinSPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/MultiReturnParameterizedBuiltinSPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/MultiReturnParameterizedBuiltinSPInstruction.java
index a53f673..9b35daa 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/MultiReturnParameterizedBuiltinSPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/MultiReturnParameterizedBuiltinSPInstruction.java
@@ -24,6 +24,7 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map.Entry;
 
 import org.apache.spark.Accumulator;
@@ -126,9 +127,11 @@ public class MultiReturnParameterizedBuiltinSPInstruction 
extends ComputationSPI
                        String spec = ec.getScalarInput(input2.getName(), 
input2.getValueType(), input2.isLiteral()).getStringValue();
                        MatrixCharacteristics mcIn = 
sec.getMatrixCharacteristics(input1.getName());
                        MatrixCharacteristics mcOut = 
sec.getMatrixCharacteristics(output.getName());
-                       
+                       List<String> colnames = 
!TfMetaUtils.isIDSpecification(spec) ?
+                                       in.lookup(1L).get(0).getColumnNames() : 
null; 
+                                       
                        //step 1: build transform meta data
-                       Encoder encoderBuild = 
EncoderFactory.createEncoder(spec, 
+                       Encoder encoderBuild = 
EncoderFactory.createEncoder(spec, colnames,
                                        fo.getSchema(), 
(int)fo.getNumColumns(), null);
                        
                        Accumulator<Long> accMax = 
sec.getSparkContext().accumulator(0L, new MaxAcc()); 
@@ -148,17 +151,18 @@ public class MultiReturnParameterizedBuiltinSPInstruction 
extends ComputationSPI
                        FrameReader reader = 
FrameReaderFactory.createFrameReader(InputInfo.TextCellInputInfo);
                        FrameBlock meta = 
reader.readFrameFromHDFS(fometa.getFileName(), accMax.value(), 
fo.getNumColumns());
                        meta.recomputeColumnCardinality(); //recompute num 
distinct items per column
+                       
meta.setColumnNames((colnames!=null)?colnames:meta.getColumnNames());
                        
                        //step 2: transform apply (similar to spark 
transformapply)
                        //compute omit offset map for block shifts
                        TfOffsetMap omap = null;
-                       if( TfMetaUtils.containsOmitSpec(spec) ) {
+                       if( TfMetaUtils.containsOmitSpec(spec, colnames) ) {
                                omap = new 
TfOffsetMap(SparkUtils.toIndexedLong(in.mapToPair(
-                                       new 
RDDTransformApplyOffsetFunction(spec)).collect()));
+                                       new 
RDDTransformApplyOffsetFunction(spec, colnames)).collect()));
                        }
                                
                        //create encoder broadcast (avoiding replication per 
task) 
-                       Encoder encoder = EncoderFactory.createEncoder(spec, 
+                       Encoder encoder = EncoderFactory.createEncoder(spec, 
colnames,
                                        fo.getSchema(), 
(int)fo.getNumColumns(), meta);
                        
mcOut.setDimension(mcIn.getRows()-((omap!=null)?omap.getNumRmRows():0), 
encoder.getNumCols()); 
                        Broadcast<Encoder> bmeta = 
sec.getSparkContext().broadcast(encoder);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
index 25a4078..f880f57 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
@@ -21,6 +21,7 @@ package org.apache.sysml.runtime.instructions.spark;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.function.Function;
@@ -428,16 +429,18 @@ public class ParameterizedBuiltinSPInstruction  extends 
ComputationSPInstruction
                        FrameBlock meta = 
sec.getFrameInput(params.get("meta"));                
                        MatrixCharacteristics mcIn = 
sec.getMatrixCharacteristics(params.get("target"));
                        MatrixCharacteristics mcOut = 
sec.getMatrixCharacteristics(output.getName());
+                       List<String> colnames = 
!TfMetaUtils.isIDSpecification(params.get("spec")) ?
+                                       in.lookup(1L).get(0).getColumnNames() : 
null; 
                        
                        //compute omit offset map for block shifts
                        TfOffsetMap omap = null;
-                       if( TfMetaUtils.containsOmitSpec(params.get("spec")) ) {
+                       if( TfMetaUtils.containsOmitSpec(params.get("spec"), 
colnames) ) {
                                omap = new 
TfOffsetMap(SparkUtils.toIndexedLong(in.mapToPair(
-                                       new 
RDDTransformApplyOffsetFunction(params.get("spec"))).collect()));
+                                       new 
RDDTransformApplyOffsetFunction(params.get("spec"), colnames)).collect()));
                        }
                                
                        //create encoder broadcast (avoiding replication per 
task) 
-                       Encoder encoder = 
EncoderFactory.createEncoder(params.get("spec"), 
+                       Encoder encoder = 
EncoderFactory.createEncoder(params.get("spec"), colnames,
                                        fo.getSchema(), 
(int)fo.getNumColumns(), meta);
                        
mcOut.setDimension(mcIn.getRows()-((omap!=null)?omap.getNumRmRows():0), 
encoder.getNumCols()); 
                        Broadcast<Encoder> bmeta = 
sec.getSparkContext().broadcast(encoder);
@@ -460,6 +463,7 @@ public class ParameterizedBuiltinSPInstruction  extends 
ComputationSPInstruction
                        JavaPairRDD<MatrixIndexes,MatrixBlock> in = 
sec.getBinaryBlockRDDHandleForVariable(params.get("target"));
                        MatrixCharacteristics mc = 
sec.getMatrixCharacteristics(params.get("target"));
                        FrameBlock meta = 
sec.getFrameInput(params.get("meta"));                
+                       List<String> colnames = meta.getColumnNames();
                        
                        //reblock if necessary (clen > bclen)
                        if( mc.getCols() > mc.getNumColBlocks() ) {
@@ -469,7 +473,7 @@ public class ParameterizedBuiltinSPInstruction  extends 
ComputationSPInstruction
                        }
                        
                        //construct decoder and decode individual matrix blocks
-                       Decoder decoder = 
DecoderFactory.createDecoder(params.get("spec"), null, meta);
+                       Decoder decoder = 
DecoderFactory.createDecoder(params.get("spec"), colnames, null, meta);
                        JavaPairRDD<Long,FrameBlock> out = in.mapToPair(
                                        new RDDTransformDecodeFunction(decoder, 
meta.getNumColumns(), mc.getRowsPerBlock()));
                        
@@ -769,9 +773,9 @@ public class ParameterizedBuiltinSPInstruction  extends 
ComputationSPInstruction
                
                private int[] _omitColList = null;
                
-               public RDDTransformApplyOffsetFunction(String spec) {
+               public RDDTransformApplyOffsetFunction(String spec, 
List<String> colnames) {
                        try {
-                               _omitColList = 
TfMetaUtils.parseJsonIDList(spec, TfUtils.TXMETHOD_OMIT);
+                               _omitColList = 
TfMetaUtils.parseJsonIDList(spec, colnames, TfUtils.TXMETHOD_OMIT);
                        } 
                        catch (DMLRuntimeException e) {
                                throw new RuntimeException(e);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
index b63724a..c640d4d 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Arrays;
 
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
@@ -394,10 +395,9 @@ public class FrameRDDConverterUtils
                private String _delim = null;
                private boolean _fill = false;
                private int _maxRowsPerBlock = -1; 
+               private List<String> _colnames = null;
                
-               
-               public CSVToBinaryBlockFunction(MatrixCharacteristics mc, 
boolean hasHeader, String delim, boolean fill)
-               {
+               public CSVToBinaryBlockFunction(MatrixCharacteristics mc, 
boolean hasHeader, String delim, boolean fill) {
                        _clen = mc.getCols();
                        _hasHeader = hasHeader;
                        _delim = delim;
@@ -420,10 +420,12 @@ public class FrameRDDConverterUtils
                                Tuple2<Text,Long> tmp = arg0.next();
                                String row = tmp._1().toString();
                                long rowix = tmp._2();
-                               if(!_hasHeader)         // In case there is no 
header, rowindex to be adjusted to base 1.
-                                       ++rowix;
-                               if(_hasHeader && rowix == 0)    //Skip header
+                               if(!_hasHeader) // In case there is no header, 
rowindex to be adjusted to base 1.
+                                       rowix++;
+                               if(_hasHeader && rowix == 0) { //Skip header
+                                       _colnames = 
Arrays.asList(row.split(_delim));
                                        continue;
+                               }
                        
                                if( iRowsInBlock == 0 || iRowsInBlock == 
_maxRowsPerBlock) {
                                        if( iRowsInBlock == _maxRowsPerBlock )
@@ -436,7 +438,7 @@ public class FrameRDDConverterUtils
                                String[] parts = IOUtilFunctions.split(row, 
_delim);
                                boolean emptyFound = false;
                                mb[0].appendRow(parts);
-                               ++iRowsInBlock;
+                               iRowsInBlock++;
                
                                //sanity check empty cells filled w/ values
                                
IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(row, _fill, emptyFound);
@@ -453,7 +455,9 @@ public class FrameRDDConverterUtils
                {
                        //compute row block index and number of column blocks
                        ix[0] = new LongWritable(rowix);
-                       mb[0] = new FrameBlock((int)_clen, ValueType.STRING);   
        
+                       mb[0] = new FrameBlock((int)_clen, ValueType.STRING);
+                       if( _colnames != null )
+                               mb[0].setColumnNames(_colnames);
                }
                
                // Flushes current state of filled column blocks to output list.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java 
b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
index ad7cbfc..ee0e56a 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java
@@ -68,10 +68,10 @@ public class BinAgent extends Encoder
                super( null, clen );
        }
        
-       public BinAgent(JSONObject parsedSpec, int clen) 
+       public BinAgent(JSONObject parsedSpec, List<String> colnames, int clen) 
                throws JSONException, IOException 
        {
-               this(parsedSpec, clen, false);
+               this(parsedSpec, colnames, clen, false);
        }
        
        /**
@@ -81,7 +81,7 @@ public class BinAgent extends Encoder
         * @throws JSONException
         * @throws IOException 
         */
-       public BinAgent(JSONObject parsedSpec, int clen, boolean colsOnly) 
+       public BinAgent(JSONObject parsedSpec, List<String> colnames, int clen, 
boolean colsOnly) 
                throws JSONException, IOException 
        {
                super( null, clen );            
@@ -89,7 +89,7 @@ public class BinAgent extends Encoder
                        return;
                
                if( colsOnly ) {
-                       List<Integer> collist = 
TfMetaUtils.parseBinningColIDs(parsedSpec);
+                       List<Integer> collist = 
TfMetaUtils.parseBinningColIDs(parsedSpec, colnames);
                        initColList(ArrayUtils.toPrimitive(collist.toArray(new 
Integer[0])));
                }
                else 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java 
b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
index aeeb62b..b51d639 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java
@@ -64,11 +64,11 @@ public class DummycodeAgent extends Encoder
                super(list, clen);
        }
        
-       public DummycodeAgent(JSONObject parsedSpec, int clen) throws 
JSONException {
+       public DummycodeAgent(JSONObject parsedSpec, List<String> colnames, int 
clen) throws JSONException {
                super(null, clen);
                
                if ( parsedSpec.containsKey(TfUtils.TXMETHOD_DUMMYCODE) ) {
-                       int[] collist = TfMetaUtils.parseJsonIDList(parsedSpec, 
TfUtils.TXMETHOD_DUMMYCODE);
+                       int[] collist = TfMetaUtils.parseJsonIDList(parsedSpec, 
colnames, TfUtils.TXMETHOD_DUMMYCODE);
                        initColList(collist);
                }
        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java 
b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
index 68896ac..1266ced 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java
@@ -98,13 +98,13 @@ public class MVImputeAgent extends Encoder
        public KahanObject[] getMeans_scnomv()   { return _scnomvMeanList; }
        public CM_COV_Object[] getVars_scnomv()  { return _scnomvVarList; }
        
-       public MVImputeAgent(JSONObject parsedSpec, int clen) 
+       public MVImputeAgent(JSONObject parsedSpec, List<String> colnames, int 
clen) 
                throws JSONException
        {
                super(null, clen);
                
                //handle column list
-               int[] collist = TfMetaUtils.parseJsonObjectIDList(parsedSpec, 
TfUtils.TXMETHOD_IMPUTE);
+               int[] collist = TfMetaUtils.parseJsonObjectIDList(parsedSpec, 
colnames, TfUtils.TXMETHOD_IMPUTE);
                initColList(collist);
        
                //handle method list

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java 
b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
index 186ac7a..de6d59f 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java
@@ -21,6 +21,7 @@ package org.apache.sysml.runtime.transform;
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -49,13 +50,13 @@ public class OmitAgent extends Encoder
                super(list, clen);
        }
        
-       public OmitAgent(JSONObject parsedSpec, int clen) 
+       public OmitAgent(JSONObject parsedSpec, List<String> colnames, int 
clen) 
                throws JSONException 
        {
                super(null, clen);
                if (!parsedSpec.containsKey(TfUtils.TXMETHOD_OMIT))
                        return;
-               int[] collist = TfMetaUtils.parseJsonIDList(parsedSpec, 
TfUtils.TXMETHOD_OMIT);
+               int[] collist = TfMetaUtils.parseJsonIDList(parsedSpec, 
colnames, TfUtils.TXMETHOD_OMIT);
                initColList(collist);
        }
        

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java 
b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
index 5abe9db..edfdff4 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java
@@ -59,19 +59,19 @@ public class RecodeAgent extends Encoder
        private HashMap<Integer, HashMap<String, Long>> _rcdMaps  = new 
HashMap<Integer, HashMap<String, Long>>();
        private HashMap<Integer, HashMap<String,String>> _finalMaps = null;
        
-       public RecodeAgent(JSONObject parsedSpec, int clen)
+       public RecodeAgent(JSONObject parsedSpec, List<String> colnames, int 
clen)
                throws JSONException 
        {
                super(null, clen);
                int rcdCount = 0;
                
                if( parsedSpec.containsKey(TfUtils.TXMETHOD_RECODE) ) {
-                       int[] collist = TfMetaUtils.parseJsonIDList(parsedSpec, 
TfUtils.TXMETHOD_RECODE);
+                       int[] collist = TfMetaUtils.parseJsonIDList(parsedSpec, 
colnames, TfUtils.TXMETHOD_RECODE);
                        rcdCount = initColList(collist);
                }
                
                if ( parsedSpec.containsKey(TfUtils.TXMETHOD_MVRCD)) {
-                       _mvrcdList = TfMetaUtils.parseJsonIDList(parsedSpec, 
TfUtils.TXMETHOD_MVRCD);
+                       _mvrcdList = TfMetaUtils.parseJsonIDList(parsedSpec, 
colnames, TfUtils.TXMETHOD_MVRCD);
                        rcdCount += _mvrcdList.length;
                }
                

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java 
b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
index 24bde22..2b63797 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java
@@ -23,6 +23,7 @@ import java.io.EOFException;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.Arrays;
+import java.util.List;
 import java.util.regex.Pattern;
 
 import org.apache.hadoop.filecache.DistributedCache;
@@ -135,7 +136,7 @@ public class TfUtils implements Serializable{
                }               
                _NAstrings = TfUtils.parseNAStrings(job);
                _spec = job.get(MRJobConfiguration.TF_SPEC);
-               _oa = new OmitAgent(new JSONObject(_spec), -1);
+               _oa = new OmitAgent(new JSONObject(_spec), null, -1);
        }
        
        // called from GenTFMtdMapper, ApplyTf (Hadoop)
@@ -244,11 +245,13 @@ public class TfUtils implements Serializable{
        private void createAgents(JSONObject spec, String[] naStrings) 
                throws IOException, JSONException 
        {
-               _oa = new OmitAgent(spec, _numInputCols);
+               List<String> colnames = Arrays.asList(_outputColumnNames);
+               
+               _oa = new OmitAgent(spec, colnames, _numInputCols);
                _mia = new MVImputeAgent(spec, naStrings, _numInputCols);
-               _ra = new RecodeAgent(spec, _numInputCols);
-               _ba = new BinAgent(spec, _numInputCols);
-               _da = new DummycodeAgent(spec, _numInputCols);
+               _ra = new RecodeAgent(spec, colnames, _numInputCols);
+               _ba = new BinAgent(spec, colnames, _numInputCols);
+               _da = new DummycodeAgent(spec, colnames, _numInputCols);
        }
        
        public void setupAgents(OmitAgent oa, MVImputeAgent mia, RecodeAgent 
ra, BinAgent ba, DummycodeAgent da)  {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java 
b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
index 78f1ad2..f276015 100644
--- 
a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
+++ 
b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java
@@ -46,7 +46,7 @@ public class DecoderFactory
         * @throws DMLRuntimeException
         */
        @SuppressWarnings("unchecked")
-       public static Decoder createDecoder(String spec, List<ValueType> 
schema, FrameBlock meta) 
+       public static Decoder createDecoder(String spec, List<String> colnames, 
List<ValueType> schema, FrameBlock meta) 
                throws DMLRuntimeException 
        {       
                Decoder decoder = null;
@@ -64,9 +64,9 @@ public class DecoderFactory
                        
                        //create decoders 'recode', 'dummy' and 'pass-through'
                        List<Integer> rcIDs = Arrays.asList(ArrayUtils.toObject(
-                                       TfMetaUtils.parseJsonIDList(jSpec, 
TfUtils.TXMETHOD_RECODE)));
+                                       TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfUtils.TXMETHOD_RECODE)));
                        List<Integer> dcIDs = Arrays.asList(ArrayUtils.toObject(
-                                       TfMetaUtils.parseJsonIDList(jSpec, 
TfUtils.TXMETHOD_DUMMYCODE))); 
+                                       TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfUtils.TXMETHOD_DUMMYCODE))); 
                        rcIDs = new 
ArrayList<Integer>(CollectionUtils.union(rcIDs, dcIDs));
                        List<Integer> ptIDs = new 
ArrayList<Integer>(CollectionUtils
                                        
.subtract(UtilFunctions.getSequenceList(1, schema.size(), 1), rcIDs)); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java 
b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
index ae98bae..8adea7b 100644
--- 
a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
+++ 
b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java
@@ -48,8 +48,8 @@ public class EncoderFactory
         * @return
         * @throws DMLRuntimeException 
         */
-       public static Encoder createEncoder(String spec, int clen, FrameBlock 
meta) throws DMLRuntimeException {
-               return createEncoder(spec, Collections.nCopies(clen, 
ValueType.STRING), meta);
+       public static Encoder createEncoder(String spec, List<String> colnames, 
int clen, FrameBlock meta) throws DMLRuntimeException {
+               return createEncoder(spec, colnames, Collections.nCopies(clen, 
ValueType.STRING), meta);
        }
        
        /**
@@ -61,9 +61,9 @@ public class EncoderFactory
         * @return
         * @throws DMLRuntimeException
         */
-       public static Encoder createEncoder(String spec, List<ValueType> 
schema, int clen, FrameBlock meta) throws DMLRuntimeException {
+       public static Encoder createEncoder(String spec, List<String> colnames, 
List<ValueType> schema, int clen, FrameBlock meta) throws DMLRuntimeException {
                List<ValueType> lschema = (schema==null) ? 
Collections.nCopies(clen, ValueType.STRING) : schema;
-               return createEncoder(spec, lschema, meta);
+               return createEncoder(spec, colnames, lschema, meta);
        }
        
        
@@ -75,7 +75,7 @@ public class EncoderFactory
         * @throws DMLRuntimeException
         */
        @SuppressWarnings("unchecked")
-       public static Encoder createEncoder(String spec, List<ValueType> 
schema, FrameBlock meta) 
+       public static Encoder createEncoder(String spec,  List<String> 
colnames, List<ValueType> schema, FrameBlock meta) 
                throws DMLRuntimeException 
        {       
                Encoder encoder = null;
@@ -89,21 +89,21 @@ public class EncoderFactory
                        //prepare basic id lists (recode, dummycode, 
pass-through)
                        //note: any dummycode column requires recode as 
preparation
                        List<Integer> rcIDs = Arrays.asList(ArrayUtils.toObject(
-                                       TfMetaUtils.parseJsonIDList(jSpec, 
TfUtils.TXMETHOD_RECODE)));
+                                       TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfUtils.TXMETHOD_RECODE)));
                        List<Integer> dcIDs = Arrays.asList(ArrayUtils.toObject(
-                                       TfMetaUtils.parseJsonIDList(jSpec, 
TfUtils.TXMETHOD_DUMMYCODE))); 
+                                       TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfUtils.TXMETHOD_DUMMYCODE))); 
                        rcIDs = new 
ArrayList<Integer>(CollectionUtils.union(rcIDs, dcIDs));
-                       List<Integer> binIDs = 
TfMetaUtils.parseBinningColIDs(jSpec); 
+                       List<Integer> binIDs = 
TfMetaUtils.parseBinningColIDs(jSpec, colnames); 
                        List<Integer> ptIDs = new 
ArrayList<Integer>(CollectionUtils.subtract(
                                        
CollectionUtils.subtract(UtilFunctions.getSequenceList(1, clen, 1), rcIDs), 
binIDs)); 
                        List<Integer> oIDs = Arrays.asList(ArrayUtils.toObject(
-                                       TfMetaUtils.parseJsonIDList(jSpec, 
TfUtils.TXMETHOD_OMIT))); 
+                                       TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfUtils.TXMETHOD_OMIT))); 
                        List<Integer> mvIDs = Arrays.asList(ArrayUtils.toObject(
-                                       
TfMetaUtils.parseJsonObjectIDList(jSpec, TfUtils.TXMETHOD_IMPUTE))); 
+                                       
TfMetaUtils.parseJsonObjectIDList(jSpec, colnames, TfUtils.TXMETHOD_IMPUTE))); 
                        
                        //create individual encoders
                        if( !rcIDs.isEmpty() ) {
-                               RecodeAgent ra = new RecodeAgent(jSpec, clen);
+                               RecodeAgent ra = new RecodeAgent(jSpec, 
colnames, clen);
                                
ra.setColList(ArrayUtils.toPrimitive(rcIDs.toArray(new Integer[0])));
                                lencoders.add(ra);      
                        }
@@ -111,13 +111,13 @@ public class EncoderFactory
                                lencoders.add(new EncoderPassThrough(
                                                
ArrayUtils.toPrimitive(ptIDs.toArray(new Integer[0])), clen));  
                        if( !dcIDs.isEmpty() )
-                               lencoders.add(new DummycodeAgent(jSpec, 
schema.size()));
+                               lencoders.add(new DummycodeAgent(jSpec, 
colnames, schema.size()));
                        if( !binIDs.isEmpty() )
-                               lencoders.add(new BinAgent(jSpec, 
schema.size(), true));
+                               lencoders.add(new BinAgent(jSpec, colnames, 
schema.size(), true));
                        if( !oIDs.isEmpty() )
-                               lencoders.add(new OmitAgent(jSpec, 
schema.size()));
+                               lencoders.add(new OmitAgent(jSpec, colnames, 
schema.size()));
                        if( !mvIDs.isEmpty() ) {
-                               MVImputeAgent ma = new MVImputeAgent(jSpec, 
schema.size());
+                               MVImputeAgent ma = new MVImputeAgent(jSpec, 
colnames, schema.size());
                                ma.initRecodeIDList(rcIDs);
                                lencoders.add(ma);
                        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java 
b/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
index 82396f6..de883f3 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java
@@ -57,23 +57,41 @@ public class TfMetaUtils
         * @return
         * @throws DMLRuntimeException
         */
-       public static boolean containsOmitSpec(String spec) throws 
DMLRuntimeException {
-               return (TfMetaUtils.parseJsonIDList(spec, 
TfUtils.TXMETHOD_OMIT).length > 0);   
+       public static boolean isIDSpecification(String spec) throws 
DMLRuntimeException {
+               try {
+                       JSONObject jSpec = new JSONObject(spec);
+                       return jSpec.containsKey("ids") && 
jSpec.getBoolean("ids");
+               }
+               catch(JSONException ex) {
+                       throw new DMLRuntimeException(ex);
+               }
+       }
+       
+       /**
+        * 
+        * @param spec
+        * @param colnames
+        * @return
+        * @throws DMLRuntimeException
+        */
+       public static boolean containsOmitSpec(String spec, List<String> 
colnames) throws DMLRuntimeException {
+               return (TfMetaUtils.parseJsonIDList(spec, colnames, 
TfUtils.TXMETHOD_OMIT).length > 0); 
        }
        
        /**
         * 
         * @param spec
+        * @param colnames
         * @param group
         * @return
-        * @throws JSONException
+        * @throws DMLRuntimeException
         */
-       public static int[] parseJsonIDList(String spec, String group) 
+       public static int[] parseJsonIDList(String spec, List<String> colnames, 
String group) 
                throws DMLRuntimeException 
        {
                try {
                        JSONObject jSpec = new JSONObject(spec);
-                       return parseJsonIDList(jSpec, group);
+                       return parseJsonIDList(jSpec, colnames, group);
                }
                catch(JSONException ex) {
                        throw new DMLRuntimeException(ex);
@@ -82,29 +100,39 @@ public class TfMetaUtils
        
        /**
         * TODO consolidate external and internal json spec definitions
-        *              
-        * @param parsedSpec
+        * 
+        * @param spec
+        * @param colnames
         * @param group
         * @return
-        * @throws JSONException 
+        * @throws JSONException
         */
-       public static int[] parseJsonIDList(JSONObject spec, String group) 
+       public static int[] parseJsonIDList(JSONObject spec, List<String> 
colnames, String group) 
                throws JSONException
        {
                int[] colList = new int[0];
+               boolean ids = spec.containsKey("ids") && spec.getBoolean("ids");
                
                if( spec.containsKey(group) ) {
                        //parse attribute-array or plain array of IDs
                        JSONArray attrs = null;
-                       if( spec.get(group) instanceof JSONObject )
+                       if( spec.get(group) instanceof JSONObject ) {
                                attrs = (JSONArray) 
((JSONObject)spec.get(group)).get(TfUtils.JSON_ATTRS);
+                               ids = true; //file-based transform outputs ids 
w/o id tags
+                       }
                        else
                                attrs = (JSONArray)spec.get(group);             
        
                        
                        //construct ID list array
                        colList = new int[attrs.size()];
-                       for(int i=0; i < colList.length; i++) 
-                               colList[i] = UtilFunctions.toInt(attrs.get(i));
+                       for(int i=0; i < colList.length; i++) {
+                               colList[i] = ids ? 
UtilFunctions.toInt(attrs.get(i)) : 
+                                       (colnames.indexOf(attrs.get(i)) + 1);
+                               if( colList[i] <= 0 ) {
+                                       throw new RuntimeException("Specified 
column '" + 
+                                               attrs.get(i)+"' does not 
exist.");
+                               }
+                       }
                
                        //ensure ascending order of column IDs
                        Arrays.sort(colList);
@@ -120,10 +148,11 @@ public class TfMetaUtils
         * @return
         * @throws JSONException
         */
-       public static int[] parseJsonObjectIDList(JSONObject spec, String 
group) 
+       public static int[] parseJsonObjectIDList(JSONObject spec, List<String> 
colnames, String group) 
                throws JSONException
        {
                int[] colList = new int[0];
+               boolean ids = spec.containsKey("ids") && spec.getBoolean("ids");
                
                if( spec.containsKey(group) && spec.get(group) instanceof 
JSONArray )
                {
@@ -131,7 +160,12 @@ public class TfMetaUtils
                        colList = new int[colspecs.size()];
                        for(int j=0; j<colspecs.size(); j++) {
                                JSONObject colspec = (JSONObject) 
colspecs.get(j);
-                               colList[j] = colspec.getInt("id");
+                               colList[j] = ids ? colspec.getInt("id") : 
+                                       (colnames.indexOf(colspec.get("name")) 
+ 1);
+                               if( colList[j] <= 0 ) {
+                                       throw new RuntimeException("Specified 
column '" +
+                                               colspec.get(ids?"id":"name")+"' 
does not exist.");
+                               }
                        }
                        
                        //ensure ascending order of column IDs
@@ -188,8 +222,8 @@ public class TfMetaUtils
                }
 
                //get list of recode ids
-               List<Integer> recodeIDs = parseRecodeColIDs(spec);
-               List<Integer> binIDs = parseBinningColIDs(spec);
+               List<Integer> recodeIDs = parseRecodeColIDs(spec, colnames);
+               List<Integer> binIDs = parseBinningColIDs(spec, colnames);
                
                //create frame block from in-memory strings
                return convertToTransformMetaDataFrame(rows, colnames, 
recodeIDs, binIDs, meta, mvmeta);
@@ -243,8 +277,8 @@ public class TfMetaUtils
                }
                
                //get list of recode ids
-               List<Integer> recodeIDs = parseRecodeColIDs(spec);
-               List<Integer> binIDs = parseBinningColIDs(spec);
+               List<Integer> recodeIDs = parseRecodeColIDs(spec, colnames);
+               List<Integer> binIDs = parseBinningColIDs(spec, colnames);
                
                //create frame block from in-memory strings
                return convertToTransformMetaDataFrame(rows, colnames, 
recodeIDs, binIDs, meta, mvmeta);
@@ -327,7 +361,7 @@ public class TfMetaUtils
         * @throws IOException
         */
        @SuppressWarnings("unchecked")
-       private static List<Integer> parseRecodeColIDs(String spec) 
+       private static List<Integer> parseRecodeColIDs(String spec, 
List<String> colnames) 
                throws IOException 
        {       
                if( spec == null )
@@ -339,9 +373,9 @@ public class TfMetaUtils
                        //parse json transform specification for recode col ids
                        JSONObject jSpec = new JSONObject(spec);
                        List<Integer> rcIDs = Arrays.asList(ArrayUtils.toObject(
-                                       TfMetaUtils.parseJsonIDList(jSpec, 
TfUtils.TXMETHOD_RECODE)));
+                                       TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfUtils.TXMETHOD_RECODE)));
                        List<Integer> dcIDs = Arrays.asList(ArrayUtils.toObject(
-                                       TfMetaUtils.parseJsonIDList(jSpec, 
TfUtils.TXMETHOD_DUMMYCODE))); 
+                                       TfMetaUtils.parseJsonIDList(jSpec, 
colnames, TfUtils.TXMETHOD_DUMMYCODE))); 
                        specRecodeIDs = new 
ArrayList<Integer>(CollectionUtils.union(rcIDs, dcIDs));
                }
                catch(Exception ex) {
@@ -357,12 +391,12 @@ public class TfMetaUtils
         * @return
         * @throws IOException
         */
-       public static List<Integer> parseBinningColIDs(String spec) 
+       public static List<Integer> parseBinningColIDs(String spec, 
List<String> colnames) 
                throws IOException 
        {
                try {
                        JSONObject jSpec = new JSONObject(spec);
-                       return parseBinningColIDs(jSpec);
+                       return parseBinningColIDs(jSpec, colnames);
                }
                catch(JSONException ex) {
                        throw new IOException(ex);
@@ -375,17 +409,17 @@ public class TfMetaUtils
         * @return
         * @throws IOException
         */
-       public static List<Integer> parseBinningColIDs(JSONObject jSpec) 
+       public static List<Integer> parseBinningColIDs(JSONObject jSpec, 
List<String> colnames) 
                throws IOException 
        {
                try {
                        if( jSpec.containsKey(TfUtils.TXMETHOD_BIN) && 
jSpec.get(TfUtils.TXMETHOD_BIN) instanceof JSONArray ) {
                                return Arrays.asList(ArrayUtils.toObject(
-                                               
TfMetaUtils.parseJsonObjectIDList(jSpec, TfUtils.TXMETHOD_BIN)));       
+                                               
TfMetaUtils.parseJsonObjectIDList(jSpec, colnames, TfUtils.TXMETHOD_BIN)));     
                        }
                        else { //internally generates
                                return Arrays.asList(ArrayUtils.toObject(
-                                               
TfMetaUtils.parseJsonIDList(jSpec, TfUtils.TXMETHOD_BIN)));     
+                                               
TfMetaUtils.parseJsonIDList(jSpec, colnames, TfUtils.TXMETHOD_BIN)));   
                        }
                }
                catch(JSONException ex) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
index 3fad934..b0eb3ce 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameReadMetaTest.java
@@ -179,7 +179,7 @@ public class FrameReadMetaTest extends AutomatedTestBase
                throws DMLRuntimeException 
        {
                List<Integer> collist = Arrays.asList(ArrayUtils.toObject(
-                               TfMetaUtils.parseJsonIDList(spec, 
TfUtils.TXMETHOD_RECODE)));
+                               TfMetaUtils.parseJsonIDList(spec, 
M.getColumnNames(), TfUtils.TXMETHOD_RECODE)));
                HashMap<String,Long>[] ret = new HashMap[M.getNumColumns()];
                Iterator<Object[]> iter = M.getObjectRowIterator();
                while( iter.hasNext() ) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
index b61060b..27d58f9 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
@@ -39,13 +39,18 @@ public class TransformFrameEncodeApplyTest extends 
AutomatedTestBase
        //dataset and transform tasks without missing values
        private final static String DATASET1    = "homes3/homes.csv";
        private final static String SPEC1               = 
"homes3/homes.tfspec_recode.json"; 
+       private final static String SPEC1b              = 
"homes3/homes.tfspec_recode2.json"; 
        private final static String SPEC2               = 
"homes3/homes.tfspec_dummy.json";
+       private final static String SPEC2b              = 
"homes3/homes.tfspec_dummy2.json";
        private final static String SPEC3               = 
"homes3/homes.tfspec_bin.json"; //incl recode
+       private final static String SPEC3b              = 
"homes3/homes.tfspec_bin2.json"; //incl recode
        
        //dataset and transform tasks with missing values
        private final static String DATASET2    = "homes/homes.csv";
        private final static String SPEC4               = 
"homes3/homes.tfspec_impute.json";
+       private final static String SPEC4b              = 
"homes3/homes.tfspec_impute2.json";
        private final static String SPEC5               = 
"homes3/homes.tfspec_omit.json";
+       private final static String SPEC5b              = 
"homes3/homes.tfspec_omit2.json";
        
        public enum TransformType {
                RECODE,
@@ -62,62 +67,112 @@ public class TransformFrameEncodeApplyTest extends 
AutomatedTestBase
        }
        
        @Test
-       public void testHomesRecodeSingleNodeCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.RECODE);
+       public void testHomesRecodeIDsSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.RECODE, false);
        }
        
        @Test
-       public void testHomesRecodeSparkCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.RECODE);
+       public void testHomesRecodeIDsSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.RECODE, false);
        }
        
        @Test
-       public void testHomesDummycodeSingleNodeCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.DUMMY);
+       public void testHomesDummycodeIDsSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.DUMMY, false);
        }
        
        @Test
-       public void testHomesDummycodeSparkCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.DUMMY);
+       public void testHomesDummycodeIDsSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.DUMMY, false);
        }
        
        @Test
-       public void testHomesBinningSingleNodeCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.BIN);
+       public void testHomesBinningIDsSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.BIN, false);
        }
        
        @Test
-       public void testHomesBinningSparkCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.BIN);
+       public void testHomesBinningIDsSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.BIN, false);
        }
        
        @Test
-       public void testHomesOmitSingleNodeCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.OMIT);
+       public void testHomesOmitIDsSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.OMIT, false);
        }
        
        @Test
-       public void testHomesOmitSparkCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.OMIT);
+       public void testHomesOmitIDsSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.OMIT, false);
        }
        
        @Test
-       public void testHomesImputeSingleNodeCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.IMPUTE);
+       public void testHomesImputeIDsSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.IMPUTE, false);
        }
        
        @Test
-       public void testHomesImputeSparkCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.IMPUTE);
+       public void testHomesImputeIDsSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.IMPUTE, false);
        }
 
+       @Test
+       public void testHomesRecodeColnamesSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.RECODE, true);
+       }
+       
+       @Test
+       public void testHomesRecodeColnamesSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.RECODE, true);
+       }
+       
+       @Test
+       public void testHomesDummycodeColnamesSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.DUMMY, true);
+       }
+       
+       @Test
+       public void testHomesDummycodeColnamesSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.DUMMY, true);
+       }
+       
+       @Test
+       public void testHomesBinningColnamesSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.BIN, true);
+       }
+       
+       @Test
+       public void testHomesBinningColnamesSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.BIN, true);
+       }
+       
+       @Test
+       public void testHomesOmitColnamesSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.OMIT, true);
+       }
+       
+       @Test
+       public void testHomesOmitvColnamesSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.OMIT, true);
+       }
+       
+       @Test
+       public void testHomesImputeColnamesSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.IMPUTE, true);
+       }
+       
+       @Test
+       public void testHomesImputeColnamesSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.IMPUTE, true);
+       }
+       
        /**
         * 
         * @param rt
         * @param ofmt
         * @param dataset
         */
-       private void runTransformTest( RUNTIME_PLATFORM rt, String ofmt, 
TransformType type )
+       private void runTransformTest( RUNTIME_PLATFORM rt, String ofmt, 
TransformType type, boolean colnames )
        {
                //set runtime platform
                RUNTIME_PLATFORM rtold = rtplatform;
@@ -131,11 +186,11 @@ public class TransformFrameEncodeApplyTest extends 
AutomatedTestBase
                //set transform specification
                String SPEC = null; String DATASET = null;
                switch( type ) {
-                       case RECODE: SPEC = SPEC1; DATASET = DATASET1; break;
-                       case DUMMY:  SPEC = SPEC2; DATASET = DATASET1; break;
-                       case BIN:    SPEC = SPEC3; DATASET = DATASET1; break;
-                       case IMPUTE: SPEC = SPEC4; DATASET = DATASET2; break;
-                       case OMIT:   SPEC = SPEC5; DATASET = DATASET2; break;
+                       case RECODE: SPEC = colnames?SPEC1b:SPEC1; DATASET = 
DATASET1; break;
+                       case DUMMY:  SPEC = colnames?SPEC2b:SPEC2; DATASET = 
DATASET1; break;
+                       case BIN:    SPEC = colnames?SPEC3b:SPEC3; DATASET = 
DATASET1; break;
+                       case IMPUTE: SPEC = colnames?SPEC4b:SPEC4; DATASET = 
DATASET2; break;
+                       case OMIT:   SPEC = colnames?SPEC5b:SPEC5; DATASET = 
DATASET2; break;
                }
 
                if( !ofmt.equals("csv") )

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
index 09f497e..0bdf4da 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
@@ -42,7 +42,9 @@ public class TransformFrameEncodeDecodeTest extends 
AutomatedTestBase
        //dataset and transform tasks without missing values
        private final static String DATASET1    = "homes3/homes.csv";
        private final static String SPEC1               = 
"homes3/homes.tfspec_recode.json"; 
+       private final static String SPEC1b              = 
"homes3/homes.tfspec_recode2.json"; 
        private final static String SPEC2               = 
"homes3/homes.tfspec_dummy.json";
+       private final static String SPEC2b              = 
"homes3/homes.tfspec_dummy2.json";
        
        public enum TransformType {
                RECODE,
@@ -60,23 +62,43 @@ public class TransformFrameEncodeDecodeTest extends 
AutomatedTestBase
        }
        
        @Test
-       public void testHomesRecodeSingleNodeCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.RECODE);
+       public void testHomesRecodeIDsSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.RECODE, false);
        }
        
        @Test
-       public void testHomesRecodeSparkCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.RECODE);
+       public void testHomesRecodeIDsSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.RECODE, false);
        }
        
        @Test
-       public void testHomesDummycodeSingleNodeCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.DUMMY);
+       public void testHomesDummycodeIDsSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.DUMMY, false);
        }
        
        @Test
-       public void testHomesDummycodeSparkCSV() {
-               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.DUMMY);
+       public void testHomesDummycodeIDsSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.DUMMY, false);
+       }
+       
+       @Test
+       public void testHomesRecodeColnamesSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.RECODE, true);
+       }
+       
+       @Test
+       public void testHomesRecodeColnamesSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.RECODE, true);
+       }
+       
+       @Test
+       public void testHomesDummycodeColnamesSingleNodeCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", 
TransformType.DUMMY, true);
+       }
+       
+       @Test
+       public void testHomesDummycodeColnamesSparkCSV() {
+               runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", 
TransformType.DUMMY, true);
        }
        
        /**
@@ -85,7 +107,7 @@ public class TransformFrameEncodeDecodeTest extends 
AutomatedTestBase
         * @param ofmt
         * @param dataset
         */
-       private void runTransformTest( RUNTIME_PLATFORM rt, String ofmt, 
TransformType type )
+       private void runTransformTest( RUNTIME_PLATFORM rt, String ofmt, 
TransformType type, boolean colnames )
        {
                //set runtime platform
                RUNTIME_PLATFORM rtold = rtplatform;
@@ -99,8 +121,8 @@ public class TransformFrameEncodeDecodeTest extends 
AutomatedTestBase
                //set transform specification
                String SPEC = null; String DATASET = null;
                switch( type ) {
-                       case RECODE: SPEC = SPEC1; DATASET = DATASET1; break;
-                       case DUMMY:  SPEC = SPEC2; DATASET = DATASET1; break;
+                       case RECODE: SPEC = colnames?SPEC1b:SPEC1; DATASET = 
DATASET1; break;
+                       case DUMMY:  SPEC = colnames?SPEC2b:SPEC2; DATASET = 
DATASET1; break;
                        default: throw new RuntimeException("Unsupported 
transform type for encode/decode test.");
                }
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_bin2.json
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_bin2.json 
b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_bin2.json
new file mode 100644
index 0000000..a5a5375
--- /dev/null
+++ b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_bin2.json
@@ -0,0 +1,5 @@
+{
+ "recode": [ zipcode, "district", "view" ], "bin": [
+ { "name": "saleprice"  , "method": "equi-width", "numbins": 3 }
+ ,{ "name": "sqft", "method": "equi-width", "numbins": 4 }]
+  }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_dummy2.json
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_dummy2.json 
b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_dummy2.json
new file mode 100644
index 0000000..917fab6
--- /dev/null
+++ b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_dummy2.json
@@ -0,0 +1,2 @@
+{
+ "dummycode": [ "district", "view", "zipcode" ] }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_impute2.json
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_impute2.json 
b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_impute2.json
new file mode 100644
index 0000000..5fae74b
--- /dev/null
+++ 
b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_impute2.json
@@ -0,0 +1,10 @@
+{
+ "impute": [
+                 { "name": "zipcode", "method": "global_mode" }
+                ,{ "name": "district", "method": "constant", "value": "south" }
+                ,{ "name": "numbedrooms", "method": "constant", "value": "2" }
+                ,{ "name": "numbathrooms", "method": "constant", "value": "1" }
+                ,{ "name": "floors", "method": "constant", "value": "1" }
+                ,{ "name": "view", "method": "global_mode" }
+                ,{ "name": "askingprice", "method": "global_mean" }
+], "recode": [ "district", "view" ] }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_omit2.json
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_omit2.json 
b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_omit2.json
new file mode 100644
index 0000000..2114958
--- /dev/null
+++ b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_omit2.json
@@ -0,0 +1,2 @@
+{
+ "omit": [ 
"zipcode","district","numbedrooms","numbathrooms","floors","view","saleprice","askingprice"
 ], "recode": [ "district", "view" ] }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/468e472a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_recode2.json
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/functions/transform/input/homes3/homes.tfspec_recode2.json 
b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_recode2.json
new file mode 100644
index 0000000..d446628
--- /dev/null
+++ 
b/src/test/scripts/functions/transform/input/homes3/homes.tfspec_recode2.json
@@ -0,0 +1,2 @@
+{
+ "recode": [ "zipcode", "district", "view" ] }
\ No newline at end of file

Reply via email to