[SYSTEMML-561] Performance double frame-matrix casting (columns, cache)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/288438b0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/288438b0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/288438b0

Branch: refs/heads/master
Commit: 288438b0417dbb78977caeee5cbbc421fae0c5e9
Parents: 2163bfb
Author: Matthias Boehm <[email protected]>
Authored: Tue Jun 7 19:03:19 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Tue Jun 7 19:03:19 2016 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/FrameBlock.java   | 15 ++++++++
 .../sysml/runtime/util/DataConverter.java       | 39 +++++++++++++++++---
 2 files changed, 48 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/288438b0/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index aef736b..53d2001 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -342,6 +342,21 @@ public class FrameBlock implements Writable, CacheBlock, 
Externalizable
        }
        
        /**
+        * 
+        * @param c
+        * @return
+        */
+       public Object getColumn(int c) {
+               switch(_schema.get(c)) {
+                       case STRING:  return 
((StringArray)_coldata.get(c))._data; 
+                       case BOOLEAN: return 
((BooleanArray)_coldata.get(c))._data;
+                       case INT:     return ((LongArray)_coldata.get(c))._data;
+                       case DOUBLE:  return 
((DoubleArray)_coldata.get(c))._data;
+                       default:      return null;
+               }
+       }
+       
+       /**
         * Get a row iterator over the frame where all fields are encoded
         * as strings independent of their value types.  
         * 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/288438b0/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java 
b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index eddd2e3..6b46c6c 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -624,14 +624,41 @@ public class DataConverter
        public static MatrixBlock convertToMatrixBlock(FrameBlock frame) 
                throws DMLRuntimeException
        {
-               MatrixBlock mb = new MatrixBlock(frame.getNumRows(), 
frame.getNumColumns(), false);
+               int m = frame.getNumRows();
+               int n = frame.getNumColumns();
+               MatrixBlock mb = new MatrixBlock(m, n, false);
+               mb.allocateDenseBlock();
                
                List<ValueType> schema = frame.getSchema();
-               for( int i=0; i<frame.getNumRows(); i++ ) 
-                       for( int j=0; j<frame.getNumColumns(); j++ ) {
-                               mb.appendValue(i, j, 
UtilFunctions.objectToDouble(
-                                               schema.get(j), frame.get(i, 
j)));
-                       }
+               int dFreq = Collections.frequency(schema, ValueType.DOUBLE);
+               
+               if( dFreq == schema.size() ) {
+                       // special case double schema (without cell-object 
creation, 
+                       // cache-friendly row-column copy)
+                       double[][] a = new double[n][];
+                       double[] c = mb.getDenseBlock();
+                       for( int j=0; j<n; j++ )
+                               a[j] = (double[])frame.getColumn(j);            
        
+                       int blocksizeIJ = 16; //blocks of a+overhead/c in L1 
cache
+                       for( int bi=0; bi<m; bi+=blocksizeIJ )
+                               for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+                                       int bimin = Math.min(bi+blocksizeIJ, m);
+                                       int bjmin = Math.min(bj+blocksizeIJ, n);
+                                       for( int i=bi, aix=bi*n; i<bimin; i++, 
aix+=n )
+                                               for( int j=bj; j<bjmin; j++ )
+                                                       c[aix+j] = a[j][i];
+                               }
+               }
+               else { 
+                       //general case
+                       for( int i=0; i<frame.getNumRows(); i++ ) 
+                               for( int j=0; j<frame.getNumColumns(); j++ ) {
+                                       mb.appendValue(i, j, 
UtilFunctions.objectToDouble(
+                                                       schema.get(j), 
frame.get(i, j)));
+                               }
+               }
+               
+               //post-processing
                mb.examSparsity();
                
                return mb;

Reply via email to