Repository: incubator-systemml Updated Branches: refs/heads/master 44d7a8857 -> 0f8b19703
[SYSTEMML-1548] Minor performance improvements ultra-sparse matrix read This patch makes the following minor performance improvements for reading ultra-sparse matrices: (1) Avoid unnecessary csr matrix block conversion before persisting ultra-sparse matrices into a serialized storage level. (2) Exploit sparse row scalars in the context of row allocation with estimated number of non-zeros. (3) Fix synchronization point allocation for sparse/ultra-sparse matrix reads (corrupted number of non-zeros). (4) Avoid row copies on sparse block append if the right-hand-side block is not in MCSR format (unnecessary temporary allocation). Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0f8b1970 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0f8b1970 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0f8b1970 Branch: refs/heads/master Commit: 0f8b19703d446ed6f987167975a2deb506e0fd92 Parents: 44d7a88 Author: Matthias Boehm <[email protected]> Authored: Sat May 6 20:32:59 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat May 6 20:48:09 2017 -0700 ---------------------------------------------------------------------- src/main/java/org/apache/sysml/parser/DMLTranslator.java | 1 - .../runtime/instructions/spark/CheckpointSPInstruction.java | 6 ++++-- src/main/java/org/apache/sysml/runtime/io/MatrixReader.java | 5 +++-- .../java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java | 3 ++- .../org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java | 6 ++++-- 5 files changed, 13 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/parser/DMLTranslator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java index 9f63038..daf00b7 100644 --- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java +++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java @@ -25,7 +25,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; -import org.antlr.v4.parse.ANTLRParser.option_return; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sysml.conf.ConfigurationManager; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java index 1fa30b6..cddfd12 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java @@ -22,6 +22,7 @@ package org.apache.sysml.runtime.instructions.spark; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.storage.StorageLevel; import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.lops.Checkpoint; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.caching.CacheableData; @@ -121,10 +122,11 @@ public class CheckpointSPInstruction extends UnarySPInstruction out = ((JavaPairRDD<Long,FrameBlock>)in) .mapValues(new CopyFrameBlockFunction(false)); } - + //convert mcsr into memory-efficient csr if potentially sparse if( input1.getDataType()==DataType.MATRIX - && OptimizerUtils.checkSparseBlockCSRConversion(mcIn) ) + && OptimizerUtils.checkSparseBlockCSRConversion(mcIn) + && !_level.equals(Checkpoint.SER_STORAGE_LEVEL) ) { out = ((JavaPairRDD<MatrixIndexes,MatrixBlock>)out) .mapValues(new CreateSparseBlockFunction(SparseBlock.Type.CSR)); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java index ffe290e..11e306e 100644 --- a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java +++ b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java @@ -95,7 +95,8 @@ public abstract class MatrixReader * @throws IOException if IOException occurs * @throws DMLRuntimeException if DMLRuntimeException occurs */ - protected static MatrixBlock createOutputMatrixBlock( long rlen, long clen, int bclen, int brlen, long estnnz, boolean mallocDense, boolean mallocSparse ) + protected static MatrixBlock createOutputMatrixBlock( long rlen, long clen, + int bclen, int brlen, long estnnz, boolean mallocDense, boolean mallocSparse ) throws IOException, DMLRuntimeException { //check input dimension @@ -116,7 +117,7 @@ public abstract class MatrixReader if( sblock instanceof SparseBlockMCSR && clen > bclen //multiple col blocks && clen > 0 && bclen > 0 && rlen > 0 && brlen > 0 ) { //all dims known for( int i=0; i<rlen; i+=brlen ) - ret.getSparseBlock().allocate(i, Math.min((int)(estnnz/rlen),1), (int)clen); + sblock.allocate(i, Math.max((int)(estnnz/rlen),1), (int)clen); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 4172e44..ff6a007 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -723,7 +723,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab int aix = rowoffset+i; //single block append (avoid re-allocations) - if( sparseBlock.isEmpty(aix) && coloffset==0 ) { + if( sparseBlock.isEmpty(aix) && coloffset==0 + && b instanceof SparseBlockMCSR ) { sparseBlock.set(aix, b.get(i), deep); } else { //general case http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java index 9952fab..2c04865 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java @@ -134,8 +134,10 @@ public class SparseBlockMCSR extends SparseBlock @Override public void allocate(int r, int ennz, int maxnnz) { - if( _rows[r] == null ) - _rows[r] = new SparseRowVector(ennz, maxnnz); + if( _rows[r] == null ) { + _rows[r] = (ennz == 1) ? new SparseRowScalar() : + new SparseRowVector(ennz, maxnnz); + } } @Override
