Repository: systemml Updated Branches: refs/heads/master e368de8d4 -> ec4963552
[SYSTEMML-2016] Performance frame transformencode (meta data creation) This patch makes a minor performance improvement to transformencode. Specifically, we now now reuse string builders across the construction of recode map entries in order to avoid unnecessary allocation and string copies. On a scneario of 100 iterations of transformencode of a 100K x 1 random input, the runtime improved from 25.6s to 20.1s. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ec496355 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ec496355 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ec496355 Branch: refs/heads/master Commit: ec49635520f5ef223830c8632b661bd091d87eb3 Parents: e368de8 Author: Matthias Boehm <[email protected]> Authored: Sat Nov 18 01:00:41 2017 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sat Nov 18 01:00:41 2017 -0800 ---------------------------------------------------------------------- .../sysml/runtime/transform/encode/EncoderRecode.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/ec496355/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java index 8758e73..11667ce 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java +++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java @@ -156,13 +156,14 @@ public class EncoderRecode extends Encoder meta.ensureAllocatedColumns(maxDistinct); //create compact meta data representation + StringBuilder sb = new StringBuilder(); //for reuse for( int j=0; j<_colList.length; j++ ) { int colID = _colList[j]; //1-based int rowID = 0; if( _rcdMaps.containsKey(_colList[j]) ) for( Entry<String, Long> e : _rcdMaps.get(colID).entrySet() ) { - String tmp = constructRecodeMapEntry(e.getKey(), e.getValue()); - meta.set(rowID++, colID-1, tmp); + meta.set(rowID++, colID-1, + constructRecodeMapEntry(e.getKey(), e.getValue(), sb)); } meta.getColumnMetadata(colID-1).setNumDistinct( _rcdMaps.get(colID).size()); @@ -197,7 +198,14 @@ public class EncoderRecode extends Encoder * @return the concatenation of token and code with delimiter in between */ public static String constructRecodeMapEntry(String token, Long code) { - return token + Lop.DATATYPE_PREFIX + code.toString(); + StringBuilder sb = new StringBuilder(token.length()+16); + return constructRecodeMapEntry(token, code, sb); + } + + private static String constructRecodeMapEntry(String token, Long code, StringBuilder sb) { + sb.setLength(0); //reset reused string builder + return sb.append(token).append(Lop.DATATYPE_PREFIX) + .append(code.longValue()).toString(); } /**
