Repository: systemml Updated Branches: refs/heads/master 1f63b09cd -> 2604bb3f1
[SYSTEMML-2130] Fix pack correct sparse/dense format and nnzs, II This second fix pack on correct sparse/dense representations and nnz maintenance, fixes additional issue pointed out by #741. In detail, this includes: (1) Fix for the nnz maintenance of tsmm outputs in the special case of zero diagonal elements which is very rare because it requires (except for cancellations) that entire columns are empty. (2) Conditional representation change for external UDF outputs, of the UDFs themselves output matrices in form format. (3) Rand representation change for distributed spark operations and conditionally also for CP in-memory operations. (4) Improved handling of guarded sparsity checks for unary and binary operations. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/2604bb3f Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/2604bb3f Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/2604bb3f Branch: refs/heads/master Commit: 2604bb3f12bc95ee384a92dd7cdbd4a403da387f Parents: 1f63b09 Author: Matthias Boehm <[email protected]> Authored: Tue Mar 6 19:43:31 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Tue Mar 6 19:43:31 2018 -0800 ---------------------------------------------------------------------- .../instructions/cp/ComputationCPInstruction.java | 9 +++++---- .../runtime/instructions/cp/DataGenCPInstruction.java | 5 +++++ .../runtime/instructions/spark/RandSPInstruction.java | 1 + .../apache/sysml/runtime/matrix/data/LibMatrixMult.java | 2 +- src/main/java/org/apache/sysml/udf/Matrix.java | 12 ++++++++---- 5 files changed, 20 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/2604bb3f/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java index aa66595..6af1d2c 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java @@ -58,15 +58,16 @@ public abstract class ComputationCPInstruction extends CPInstruction { } protected boolean checkGuardedRepresentationChange( MatrixBlock in1, MatrixBlock in2, MatrixBlock out ) { - if( (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE - && !CacheableData.isCachingActive()) - || out.getInMemorySize() < OptimizerUtils.SAFE_REP_CHANGE_THRES) //8MB + if( DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE + && !CacheableData.isCachingActive() ) return true; double memIn1 = (in1 != null) ? in1.getInMemorySize() : 0; double memIn2 = (in2 != null) ? in2.getInMemorySize() : 0; double memReq = out.isInSparseFormat() ? MatrixBlock.estimateSizeDenseInMemory(out.getNumRows(), out.getNumColumns()) : MatrixBlock.estimateSizeSparseInMemory(out.getNumRows(), out.getNumColumns(), out.getSparsity()); - return ( memReq < memIn1 + memIn2 ); + //guarded if mem requirements smaller than input sizes + return ( memReq < memIn1 + memIn2 + + OptimizerUtils.SAFE_REP_CHANGE_THRES ); //8MB } } http://git-wip-us.apache.org/repos/asf/systemml/blob/2604bb3f/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java index 61a8bb7..c7cf2bc 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java @@ -21,6 +21,7 @@ package org.apache.sysml.runtime.instructions.cp; import org.apache.sysml.hops.DataGenOp; import org.apache.sysml.hops.Hop.DataGenMethod; +import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.lops.DataGen; import org.apache.sysml.lops.Lop; import org.apache.sysml.parser.Expression.DataType; @@ -250,6 +251,10 @@ public class DataGenCPInstruction extends UnaryCPInstruction { soresBlock = MatrixBlock.sampleOperations(range, (int)lrows, replace, seed); } + //guarded sparse block representation change + if( soresBlock.getInMemorySize() < OptimizerUtils.SAFE_REP_CHANGE_THRES ) + soresBlock.examSparsity(); + //release created output ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode()); } http://git-wip-us.apache.org/repos/asf/systemml/blob/2604bb3f/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java index 1a1633e..926e229 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java @@ -703,6 +703,7 @@ public class RandSPInstruction extends UnarySPInstruction { .createRandomMatrixGenerator(_pdf, lrlen, lclen, lrlen, lclen,_sparsity, _min, _max, _pdfParams); blk.randOperationsInPlace(rgen, null, seed); + blk.examSparsity(); return new Tuple2<>(kv._1, blk); } } http://git-wip-us.apache.org/repos/asf/systemml/blob/2604bb3f/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index d5ed8b2..9d52f00 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -3671,9 +3671,9 @@ public class LibMatrixMult int bimin = Math.min(bi+blocksizeIJ, n); for( int i=bi, rix=bi*n; i<bimin; i++, rix+=n ) { LibMatrixReorg.transposeRow(c, c, rix+bi, bi*n+i, n, bimin-bi); + nnz += (c[rix+i] != 0) ? 1 : 0; //for diagonal element for( int j=rix+i+1; j<rix+bimin; j++ ) nnz += (c[j] != 0) ? 2 : 0; - nnz++; //for diagonal element } } http://git-wip-us.apache.org/repos/asf/systemml/blob/2604bb3f/src/main/java/org/apache/sysml/udf/Matrix.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/Matrix.java b/src/main/java/org/apache/sysml/udf/Matrix.java index b75b915..2fae725 100644 --- a/src/main/java/org/apache/sysml/udf/Matrix.java +++ b/src/main/java/org/apache/sysml/udf/Matrix.java @@ -22,6 +22,7 @@ package org.apache.sysml.udf; import java.io.IOException; import org.apache.sysml.conf.ConfigurationManager; +import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.parser.Expression; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; @@ -222,14 +223,17 @@ public class Matrix extends FunctionParameter MatrixCharacteristics mc = new MatrixCharacteristics(_rows, _cols, rblen, cblen, nnz); MetaDataFormat mfmd = new MetaDataFormat(mc, oinfo, iinfo); - try - { + try { + //check for correct sparse/dense representation + if( mb.getInMemorySize() < OptimizerUtils.SAFE_REP_CHANGE_THRES ) + mb.examSparsity(); + + //construct output matrix object _mo = new MatrixObject(Expression.ValueType.DOUBLE, _filePath, mfmd); _mo.acquireModify( mb ); _mo.release(); } - catch(Exception e) - { + catch(Exception e) { throw new IOException(e); } }
