[SYSTEMML-766] Performance axpy runtime (sparse-safe, dense-dense ops) The patch makes various performance improvements to axpy, i.e., +* and -*, some of which also apply to other operations. On a scenario with 100kx1k dense matrices and 20 iterations, this led to an improvement from 24s to 8.1s.
(1) PlusMultiply and MinusMultiply marked as sparse-safe binary operations (reduce runtime from 24s to 10.3s) (2) Improved dense-dense binary operations: nnz and input handling (reduced runtime from 10.3s to 8.1s). Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8a05574c Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8a05574c Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8a05574c Branch: refs/heads/master Commit: 8a05574c8f2c5f63ef39db1ff76ed5d1cb6557b2 Parents: 973b863 Author: Matthias Boehm <[email protected]> Authored: Wed Jul 20 23:56:33 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu Jul 21 12:54:19 2016 -0700 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixBincell.java | 22 +++++++++++--------- .../matrix/operators/BinaryOperator.java | 16 +++++++------- 2 files changed, 20 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8a05574c/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java index 5a0ccdb..dd0b9e0 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java @@ -35,6 +35,7 @@ import org.apache.sysml.runtime.functionobjects.NotEquals; import org.apache.sysml.runtime.functionobjects.Or; import org.apache.sysml.runtime.functionobjects.Plus; import org.apache.sysml.runtime.functionobjects.Power2; +import org.apache.sysml.runtime.functionobjects.ValueFunction; import org.apache.sysml.runtime.matrix.operators.BinaryOperator; import org.apache.sysml.runtime.matrix.operators.ScalarOperator; import org.apache.sysml.runtime.util.DataConverter; @@ -402,23 +403,24 @@ public class LibMatrixBincell //3) recompute nnz ret.recomputeNonZeros(); } - else if( !ret.sparse && !m1.sparse && !m2.sparse && m1.denseBlock!=null && m2.denseBlock!=null ) + else if( !ret.sparse && !m1.sparse && !m2.sparse + && m1.denseBlock!=null && m2.denseBlock!=null ) { ret.allocateDenseBlock(); final int m = ret.rlen; final int n = ret.clen; + double[] a = m1.denseBlock; + double[] b = m2.denseBlock; double[] c = ret.denseBlock; + ValueFunction fn = op.fn; - //int nnz = 0; - for( int i=0; i<m*n; i++ ) - { - c[i] = op.fn.execute(m1.denseBlock[i], m2.denseBlock[i]); - //HotSpot JVM bug causes crash in presence of NaNs - //nnz += (c[i]!=0)? 1 : 0; - if( c[i] != 0 ) - ret.nonZeros++; + //compute dense-dense binary, maintain nnz on-the-fly + int nnz = 0; + for( int i=0; i<m*n; i++ ) { + c[i] = fn.execute(a[i], b[i]); + nnz += (c[i]!=0)? 1 : 0; } - //result.nonZeros = nnz; + ret.nonZeros = nnz; } else //generic case { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8a05574c/src/main/java/org/apache/sysml/runtime/matrix/operators/BinaryOperator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/operators/BinaryOperator.java b/src/main/java/org/apache/sysml/runtime/matrix/operators/BinaryOperator.java index a268f49..69b3e5f 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/operators/BinaryOperator.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/operators/BinaryOperator.java @@ -33,32 +33,32 @@ import org.apache.sysml.runtime.functionobjects.IntegerDivide; import org.apache.sysml.runtime.functionobjects.LessThan; import org.apache.sysml.runtime.functionobjects.LessThanEquals; import org.apache.sysml.runtime.functionobjects.Minus; +import org.apache.sysml.runtime.functionobjects.MinusMultiply; import org.apache.sysml.runtime.functionobjects.MinusNz; import org.apache.sysml.runtime.functionobjects.Modulus; import org.apache.sysml.runtime.functionobjects.Multiply; import org.apache.sysml.runtime.functionobjects.NotEquals; import org.apache.sysml.runtime.functionobjects.Or; import org.apache.sysml.runtime.functionobjects.Plus; +import org.apache.sysml.runtime.functionobjects.PlusMultiply; import org.apache.sysml.runtime.functionobjects.Power; import org.apache.sysml.runtime.functionobjects.ValueFunction; import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinFunctionCode; public class BinaryOperator extends Operator implements Serializable { - private static final long serialVersionUID = -2547950181558989209L; public ValueFunction fn; public BinaryOperator(ValueFunction p) { - fn=p; - //as long as (0 op 0)=0, then op is sparseSafe - if(fn instanceof Plus || fn instanceof Multiply || fn instanceof Minus - || fn instanceof And || fn instanceof Or) - sparseSafe=true; - else - sparseSafe=false; + fn = p; + + //binaryop is sparse-safe iff (0 op 0) == 0 + sparseSafe = (fn instanceof Plus || fn instanceof Multiply + || fn instanceof Minus || fn instanceof And || fn instanceof Or + || fn instanceof PlusMultiply || fn instanceof MinusMultiply); } /**
