[SYSTEMML-2276] Avoid unnecessary read of empty binary matrices So far all matrices where read independent of the given number of non-zeros. For empty, and sparse matrices in general, we allocate a subset of sparse rows as synchronization pointers. This patch modifies the binary block readers to early out for known empty matrices and immediately return a non-allocated matrix block instead. This is especially helpful for remote parfor loops where empty result variables are read by all parfor tasks and thus are partially contented on the shared common thread pool.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/728b2c90 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/728b2c90 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/728b2c90 Branch: refs/heads/master Commit: 728b2c904abf7f98042a78eba497bd79ebe6dea9 Parents: d34d6a6 Author: Matthias Boehm <[email protected]> Authored: Sun Apr 22 16:47:13 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Apr 22 16:47:13 2018 -0700 ---------------------------------------------------------------------- src/main/java/org/apache/sysml/runtime/io/MatrixReader.java | 1 + .../java/org/apache/sysml/runtime/io/ReaderBinaryBlock.java | 4 ++++ .../org/apache/sysml/runtime/io/ReaderBinaryBlockParallel.java | 6 +++++- 3 files changed, 10 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/728b2c90/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java index f9df756..3109065 100644 --- a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java +++ b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java @@ -49,6 +49,7 @@ public abstract class MatrixReader { //internal configuration protected static final boolean AGGREGATE_BLOCK_NNZ = true; + protected static final boolean RETURN_EMPTY_NNZ0 = true; public abstract MatrixBlock readMatrixFromHDFS( String fname, long rlen, long clen, int brlen, int bclen, long estnnz ) throws IOException, DMLRuntimeException; http://git-wip-us.apache.org/repos/asf/systemml/blob/728b2c90/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlock.java b/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlock.java index a0c1ca3..9461ca1 100644 --- a/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlock.java +++ b/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlock.java @@ -54,6 +54,10 @@ public class ReaderBinaryBlock extends MatrixReader public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { + //early abort for known empty matrices (e.g., remote parfor result vars) + if( RETURN_EMPTY_NNZ0 && estnnz == 0 ) + return new MatrixBlock((int)rlen, (int)clen, true); + //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, brlen, bclen, estnnz, false, false); http://git-wip-us.apache.org/repos/asf/systemml/blob/728b2c90/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlockParallel.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlockParallel.java b/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlockParallel.java index 70e6e21..871abd7 100644 --- a/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlockParallel.java +++ b/src/main/java/org/apache/sysml/runtime/io/ReaderBinaryBlockParallel.java @@ -54,7 +54,11 @@ public class ReaderBinaryBlockParallel extends ReaderBinaryBlock @Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException - { + { + //early abort for known empty matrices (e.g., remote parfor result vars) + if( RETURN_EMPTY_NNZ0 && estnnz == 0 ) + return new MatrixBlock((int)rlen, (int)clen, true); + //allocate output matrix block (incl block allocation for parallel) MatrixBlock ret = createOutputMatrixBlock(rlen, clen, brlen, bclen, estnnz, true, true);
