[SYSTEMML-1850] Fix robustness transformdecode w/ delimiter tokens

This patch makes the construction and splitting of recode map entries
consistent and robust for special tokens, that include the delimiter
(i.e., Lop.DATATYPE_PREFIX) itself.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/4c3eab89
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/4c3eab89
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/4c3eab89

Branch: refs/heads/master
Commit: 4c3eab89f32355b2d5d898db4411d9cde6eb8c08
Parents: 585afa2
Author: Matthias Boehm <[email protected]>
Authored: Thu Aug 17 20:46:50 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Aug 18 14:15:42 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/FrameBlock.java     |  9 ++-------
 .../runtime/transform/decode/DecoderRecode.java   |  6 +++---
 .../runtime/transform/encode/EncoderRecode.java   | 18 ++++++++++++++++--
 3 files changed, 21 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/4c3eab89/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index 45ad26c..b3dc311 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -35,11 +35,11 @@ import java.util.Map;
 
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.io.Writable;
-import org.apache.sysml.lops.Lop;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.CacheBlock;
 import org.apache.sysml.runtime.io.IOUtilFunctions;
+import org.apache.sysml.runtime.transform.encode.EncoderRecode;
 import org.apache.sysml.runtime.util.IndexRange;
 import org.apache.sysml.runtime.util.UtilFunctions;
 
@@ -1049,12 +1049,7 @@ public class FrameBlock implements Writable, CacheBlock, 
Externalizable
                for( int i=0; i<getNumRows(); i++ ) {
                        Object val = ldata.get(i);
                        if( val != null ) {
-                               // Instead of using splitCSV which is forcing 
string with RFC-4180 format, 
-                               // using Lop.DATATYPE_PREFIX separator to split 
token and code 
-                               String[] tmp =  new String[2];
-                               int pos = 
val.toString().lastIndexOf(Lop.DATATYPE_PREFIX);
-                               tmp[0] = val.toString().substring(0, pos);
-                               tmp[1] = val.toString().substring(pos+1);
+                               String[] tmp = 
EncoderRecode.splitRecodeMapEntry(val.toString());
                                map.put(tmp[0], Long.parseLong(tmp[1]));
                        }
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/4c3eab89/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java 
b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
index c53704b..f6e8471 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java
@@ -21,12 +21,12 @@ package org.apache.sysml.runtime.transform.decode;
 
 import java.util.HashMap;
 
-import org.apache.sysml.lops.Lop;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.matrix.data.FrameBlock;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.Pair;
 import org.apache.sysml.runtime.transform.TfUtils;
+import org.apache.sysml.runtime.transform.encode.EncoderRecode;
 import org.apache.sysml.runtime.util.UtilFunctions;
 
 /**
@@ -82,9 +82,9 @@ public class DecoderRecode extends Decoder
                        for( int i=0; i<meta.getNumRows(); i++ ) {
                                if( meta.get(i, _colList[j]-1)==null )
                                        break; //reached end of recode map
-                               String[] tmp = meta.get(i, 
_colList[j]-1).toString().split(Lop.DATATYPE_PREFIX);                           
     
+                               String[] tmp = 
EncoderRecode.splitRecodeMapEntry(meta.get(i, _colList[j]-1).toString());
                                Object obj = 
UtilFunctions.stringToObject(_schema[_colList[j]-1], tmp[0]);
-                               map.put(Long.parseLong(tmp[1]), obj);           
                
+                               map.put(Long.parseLong(tmp[1]), obj);
                        }
                        _rcMaps[j] = map;
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/4c3eab89/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java 
b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
index 526d31e..3acf640 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
@@ -188,11 +188,25 @@ public class EncoderRecode extends Encoder
        
        /**
         * Returns the Recode map entry which consists of concatenation of 
code, delimiter and token. 
+        * 
         * @param token is part of Recode map
-        * @param code  is code for token 
-        * @return the concatenation of code and token with delimiter in between
+        * @param code  is code for token
+        * @return the concatenation of token and code with delimiter in between
         */
        public static String constructRecodeMapEntry(String token, Long code) {
                return token + Lop.DATATYPE_PREFIX + code.toString();
        }
+       
+       /**
+        * Splits a Recode map entry into its token and code.
+        * 
+        * @param value concatenation of token and code with delimiter in 
between
+        * @return string array of token and code
+        */
+       public static String[] splitRecodeMapEntry(String value) {
+               // Instead of using splitCSV which is forcing string with 
RFC-4180 format, 
+               // using Lop.DATATYPE_PREFIX separator to split token and code 
+               int pos = value.toString().lastIndexOf(Lop.DATATYPE_PREFIX);
+               return new String[] {value.substring(0, pos), 
value.substring(pos+1)};
+       }
 }

Reply via email to