Repository: incubator-systemml
Updated Branches:
  refs/heads/master 10727e5d0 -> 4288da9ee


[SYSTEMML-1660] Performance sparse removeEmpty/order (shallow copy)

Similar to dense vector transpose and dense row-wise reshape, this patch
improves performance of sparse removeEmpty(rows) and order via shallow
copy of sparse rows, exploiting the fact that removeEmpty(rows) and
order do not modify the actual sparse rows. This shallow copy (by
reference) is safe due to copy-on-write semantics and safe
update-in-place initialization. Note that the recent change of shallow
serialize of sparse matrices on buffer pool write (SYSTEMML-1140),
enabled this optimization potential, because without shallow serialize
we would still effectively copy the entire matrix.

Experiments over different matrix shapes and sparsity showed huge
improvements of orders of magnitude. For example, here are the results
for 20 iterations of the respective single-node in-memory operations:

1) removeEmpty(rows), 20 iterations: 
1M x 1K, sparsity=0.1:     10.3s -> 1.4s
100K x 10K, sparsity=0.1:  7.1s -> 160ms
10K x 100K, sparsity=0.1:  6.3s -> 21ms

2) order, 20 iterations:
1M x 1K, sparsity=0.1:     13.4s -> 3.9s
100K x 10K, sparsity=0.1:  7.7s -> 510ms
10K x 100K, sparsity=0.1:  6.5s -> 64ms

Note that the improvements increase as the number of columns, or more
precisely, the number of non-zeros per row increase because there are
additional overheads (such as selection vector extraction or the actual
sorting) in the number of rows.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4288da9e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4288da9e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4288da9e

Branch: refs/heads/master
Commit: 4288da9eec006891025379b9fc723cf0e72adeaf
Parents: 10727e5
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Sat Jun 3 13:11:14 2017 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Sat Jun 3 13:11:14 2017 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixReorg.java     | 44 ++++++++++----------
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  8 +++-
 2 files changed, 28 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4288da9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index bfaf4c8..73d50eb 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -60,9 +60,14 @@ import org.apache.sysml.runtime.util.UtilFunctions;
  */
 public class LibMatrixReorg 
 {
-       public static final long PAR_NUMCELL_THRESHOLD = 1024*1024;   //Min 1M 
elements
-       public static final boolean SHALLOW_DENSE_VECTOR_TRANSPOSE = true;
-       public static final boolean SHALLOW_DENSE_ROWWISE_RESHAPE = true;
+       //minimum number of elements for multi-threaded execution
+       public static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //1M
+       
+       //allow shallow dense/sparse copy for unchanged data (which is 
+       //safe due to copy-on-write and safe update-in-place handling)
+       public static final boolean SHALLOW_COPY_REORG = true;
+       
+       //allow reuse of temporary blocks for certain operations
        public static final boolean ALLOW_BLOCK_REUSE = false;
        
        private enum ReorgType {
@@ -126,7 +131,7 @@ public class LibMatrixReorg
                //since the physical representation of dense vectors is always 
the same,
                //we don't need to create a copy, given our copy on write 
semantics.
                //however, note that with update in-place this would be an 
invalid optimization
-               if( SHALLOW_DENSE_VECTOR_TRANSPOSE && !in.sparse && !out.sparse 
&& (in.rlen==1 || in.clen==1)  ) {
+               if( SHALLOW_COPY_REORG && !in.sparse && !out.sparse && 
(in.rlen==1 || in.clen==1)  ) {
                        out.denseBlock = in.denseBlock;
                        return out;
                }
@@ -160,7 +165,7 @@ public class LibMatrixReorg
        {
                //redirect small or special cases to sequential execution
                if( in.isEmptyBlock(false) || (in.rlen * in.clen < 
PAR_NUMCELL_THRESHOLD) || k == 1
-                       || (SHALLOW_DENSE_VECTOR_TRANSPOSE && !in.sparse && 
!out.sparse && (in.rlen==1 || in.clen==1) )
+                       || (SHALLOW_COPY_REORG && !in.sparse && !out.sparse && 
(in.rlen==1 || in.clen==1) )
                        || (in.sparse && !out.sparse && in.rlen==1) || 
(!in.sparse && out.sparse && in.rlen==1) 
                        || (!in.sparse && out.sparse) || !out.isThreadSafe())
                {
@@ -395,12 +400,11 @@ public class LibMatrixReorg
                        else //SPARSE
                        {
                                out.allocateSparseRowsBlock(false);
-                               for( int i=0; i<rlen; i++ ) {
-                                       int ix = vix[i];
-                                       if( !in.sparseBlock.isEmpty(ix) ) {
-                                               out.sparseBlock.set(i, 
in.sparseBlock.get(ix), true);
+                               for( int i=0; i<rlen; i++ )
+                                       if( !in.sparseBlock.isEmpty(vix[i]) ) {
+                                               out.sparseBlock.set(i, 
in.sparseBlock.get(vix[i]),
+                                                       !SHALLOW_COPY_REORG); 
//row remains unchanged
                                        }
-                               }
                        }
                }
                else
@@ -1085,7 +1089,7 @@ public class LibMatrixReorg
                        return;
                
                //shallow dense by-row reshape (w/o result allocation)
-               if( SHALLOW_DENSE_ROWWISE_RESHAPE && rowwise ) {
+               if( SHALLOW_COPY_REORG && rowwise ) {
                        //since the physical representation of dense matrices 
is always the same,
                        //we don't need to create a copy, given our copy on 
write semantics.
                        //however, note that with update in-place this would be 
an invalid optimization
@@ -1677,25 +1681,19 @@ public class LibMatrixReorg
                        {
                                SparseBlock a = in.sparseBlock;                 
        
                                for ( int i=0; i < m; i++ )
-                                       if ( !a.isEmpty(i) ) {
-                                               flags[i] = true;
-                                               rlen2++;
-                                       }
+                                       rlen2 += (flags[i] = !a.isEmpty(i)) ? 1 
: 0;
                        }
                        else //DENSE
                        {
                                double[] a = in.denseBlock;
-                               
-                               for(int i=0, aix=0; i<m; i++, aix+=n) {
+                               for(int i=0, aix=0; i<m; i++, aix+=n)
                                        for(int j=0; j<n; j++)
-                                               if( a[aix+j] != 0 )
-                                               {
+                                               if( a[aix+j] != 0 ) {
                                                        flags[i] = true;
                                                        rlen2++;
                                                        //early abort for 
current row
                                                        break; 
                                                }
-                               }
                        }
                } 
                else {                  
@@ -1716,8 +1714,10 @@ public class LibMatrixReorg
                {
                        //note: output dense or sparse
                        for( int i=0, cix=0; i<m; i++ )
-                               if( flags[i] )
-                                       ret.appendRow(cix++, 
in.sparseBlock.get(i));
+                               if( flags[i] ) {
+                                       ret.appendRow(cix++, 
in.sparseBlock.get(i),
+                                               !SHALLOW_COPY_REORG);
+                               }
                }
                else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
                {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4288da9e/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index e61c6a2..43adfc6 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -693,7 +693,11 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                }
        }
 
-       public void appendRow(int r, SparseRow row)
+       public void appendRow(int r, SparseRow row) {
+               appendRow(r, row, true);
+       }
+       
+       public void appendRow(int r, SparseRow row, boolean deep)
        {
                if(row == null)
                        return;
@@ -701,7 +705,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                if(sparse) {
                        //allocation on demand
                        allocateSparseRowsBlock(false);
-                       sparseBlock.set(r, row, true);
+                       sparseBlock.set(r, row, deep);
                        nonZeros += row.size();
                }
                else {

Reply via email to