[SYSTEMML-2038] Memory-efficiency dense-sparse matrix conversion (CSR) This patch reworks the dense-sparse matrix block conversion for improved memory efficiency. This operation primitive knows the number of non-zeros upfront and is a simply sequential operation. In order to improve memory-efficiency of sparse intermediates, we now output the target in CSR instead of MCSR format.
Furthermore, this also patch cleans up unnecessary (and misleading) statistics maintenance in these conversion primitives, because these are never called in an operation-specific manner (except for left indexing operations). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c965a88a Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c965a88a Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c965a88a Branch: refs/heads/master Commit: c965a88aac7edd891d4edfddc02306d60b827c96 Parents: f13f697 Author: Matthias Boehm <mboe...@gmail.com> Authored: Wed Dec 6 20:14:39 2017 -0800 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Thu Dec 7 13:36:11 2017 -0800 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/MatrixBlock.java | 90 +++++++------------- 1 file changed, 30 insertions(+), 60 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/c965a88a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index ac0655c..2f69e5d 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -1033,7 +1033,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab { //determine target representation boolean sparseDst = evalSparseFormatInMemory(); - + //check for empty blocks (e.g., sparse-sparse) if( isEmptyBlock(false) ) cleanupBlock(true, true); @@ -1041,9 +1041,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab //change representation if required (also done for //empty blocks in order to set representation flags) if( sparse && !sparseDst) - sparseToDense(opcode); + sparseToDense(); else if( !sparse && sparseDst ) - denseToSparse(opcode); + denseToSparse(); } /** @@ -1095,74 +1095,48 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab //////// - // basic block handling functions - - void denseToSparse() - { - denseToSparse(null); - } + // basic block handling functions - void denseToSparse(String opcode) - { - long t1 = opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0; - - //set target representation + private void denseToSparse() + { + //set target representation, early abort on empty blocks sparse = true; - - //early abort on empty blocks - if(denseBlock==null) + if( denseBlock==null ) return; - //allocate sparse target block (reset required to maintain nnz again) - allocateSparseRowsBlock(); - reset(); - - //copy dense to sparse with (1) row pre-allocation to avoid repeated - //allocation on append, and (2) nnz re-computation double[] a = denseBlock; - SparseBlock c = sparseBlock; final int m = rlen; final int n = clen; - long nnz = 0; - for( int i=0, aix=0; i<m; i++, aix+=n ) { - //recompute nnz per row (not via recomputeNonZeros as sparse allocated) - int lnnz = 0; - for(int j=0; j<n; j++) - lnnz += (a[aix+j]!=0) ? 1 : 0; - if( lnnz <= 0 ) continue; - - //allocate sparse row and append non-zero values - c.allocate(i, lnnz); - for(int j=0; j<n; j++) { - double val = a[aix+j]; - if( val != 0 ) - c.append(i, j, val); + //allocate target in memory-efficient CSR format + int nnz = (int) nonZeros; + int[] rptr = new int[m+1]; + int[] indexes = new int[nnz]; + double[] values = new double[nnz]; + for( int i=0, pos=0, aix=0; i<m; i++ ) { + for(int j=0; j<n; j++, aix++) { + double aval = a[aix]; + if( aval != 0 ) { + indexes[pos] = j; + values[pos] = aval; + pos++; + } } - nnz += lnnz; + rptr[i+1]=pos; } - + sparseBlock = new SparseBlockCSR( + rptr, indexes, values, nnz); + //update nnz and cleanup dense block nonZeros = nnz; denseBlock = null; - if(opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS) { - long t2 = System.nanoTime(); - GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_DENSE_TO_SPARSE, t2-t1); - } } - public void sparseToDense() throws DMLRuntimeException { - sparseToDense(null); - } - - public void sparseToDense(String opcode) + public void sparseToDense() throws DMLRuntimeException { - long t1 = opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0; - //set target representation + //set target representation, early abort on empty blocks sparse = false; - - //early abort on empty blocks if(sparseBlock==null) return; @@ -1192,10 +1166,6 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab //cleanup sparse rows sparseBlock = null; - if(opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS) { - long t2 = System.nanoTime(); - GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_SPARSE_TO_DENSE, t2-t1); - } } /** @@ -2987,7 +2957,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab denseToSparse(); else if(!resultSparse.sparse && this.sparse) sparseToDense(); - + //core binary cell operation LibMatrixBincell.bincellOpInPlace(this, that, op); } @@ -3762,9 +3732,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab //ensure that the current block adheres to the sparsity estimate //and thus implicitly the memory budget used by the compiler if( result.sparse && !sp ) - result.sparseToDense(opcode); + result.sparseToDense(); else if( !result.sparse && sp ) - result.denseToSparse(opcode); + result.denseToSparse(); //ensure right sparse block representation to prevent serialization if( result.sparse && update != UpdateType.INPLACE_PINNED ) {