Repository: systemml
Updated Branches:
  refs/heads/master e368de8d4 -> ec4963552


[SYSTEMML-2016] Performance frame transformencode (meta data creation)

This patch makes a minor performance improvement to transformencode.
Specifically, we now now reuse string builders across the construction
of recode map entries in order to avoid unnecessary allocation and
string copies. On a scneario of 100 iterations of transformencode of a
100K x 1 random input, the runtime improved from 25.6s to 20.1s.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ec496355
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ec496355
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ec496355

Branch: refs/heads/master
Commit: ec49635520f5ef223830c8632b661bd091d87eb3
Parents: e368de8
Author: Matthias Boehm <[email protected]>
Authored: Sat Nov 18 01:00:41 2017 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Sat Nov 18 01:00:41 2017 -0800

----------------------------------------------------------------------
 .../sysml/runtime/transform/encode/EncoderRecode.java | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/ec496355/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java 
b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
index 8758e73..11667ce 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
@@ -156,13 +156,14 @@ public class EncoderRecode extends Encoder
                meta.ensureAllocatedColumns(maxDistinct);
                
                //create compact meta data representation
+               StringBuilder sb = new StringBuilder(); //for reuse
                for( int j=0; j<_colList.length; j++ ) {
                        int colID = _colList[j]; //1-based
                        int rowID = 0;
                        if( _rcdMaps.containsKey(_colList[j]) )
                                for( Entry<String, Long> e : 
_rcdMaps.get(colID).entrySet() ) {
-                                       String tmp = 
constructRecodeMapEntry(e.getKey(), e.getValue());
-                                       meta.set(rowID++, colID-1, tmp); 
+                                       meta.set(rowID++, colID-1, 
+                                               
constructRecodeMapEntry(e.getKey(), e.getValue(), sb)); 
                                }
                        meta.getColumnMetadata(colID-1).setNumDistinct(
                                        _rcdMaps.get(colID).size());
@@ -197,7 +198,14 @@ public class EncoderRecode extends Encoder
         * @return the concatenation of token and code with delimiter in between
         */
        public static String constructRecodeMapEntry(String token, Long code) {
-               return token + Lop.DATATYPE_PREFIX + code.toString();
+               StringBuilder sb = new StringBuilder(token.length()+16);
+               return constructRecodeMapEntry(token, code, sb);
+       }
+       
+       private static String constructRecodeMapEntry(String token, Long code, 
StringBuilder sb) {
+               sb.setLength(0); //reset reused string builder
+               return sb.append(token).append(Lop.DATATYPE_PREFIX)
+                       .append(code.longValue()).toString();
        }
        
        /**

Reply via email to