This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new d57d042 [SYSTEMDS-2856] Fix regression multi-threaded binary/ternary
operations
d57d042 is described below
commit d57d0421d069c1284b4266d47ca1cd2382c5088a
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Feb 13 21:05:40 2021 +0100
[SYSTEMDS-2856] Fix regression multi-threaded binary/ternary operations
This patch fixes performance regressions of special cases of binary
operations (early abort on empty inputs). The rework for multi-threaded
specifically use a fallback to single-threaded whenever one input is
empty. However, the allocation was moved outside the specific kernels,
leading to allocation before the early abort, which largely destroyed
the benfit of early abort.
Furthermore, we now also avoid redundant nnz maintenance per ternary
task as these are maintained anyway during computation (and now passed
back to the multi-threaded tasks).
---
.../runtime/matrix/data/LibMatrixBincell.java | 28 ++++++++++++++++++----
.../runtime/matrix/data/LibMatrixTercell.java | 17 ++++++-------
2 files changed, 33 insertions(+), 12 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
index 29363b0..44a25f4 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
@@ -164,11 +164,15 @@ public class LibMatrixBincell
*/
public static void bincellOp(MatrixBlock m1, MatrixBlock m2,
MatrixBlock ret, BinaryOperator op) {
BinaryAccessType atype = getBinaryAccessType(m1, m2);
-
- //preallocate for consistency
- if( atype == BinaryAccessType.MATRIX_MATRIX )
+
+ // preallocate for consistency (but be careful
+ // not to allocate if empty inputs might allow early abort)
+ if( atype == BinaryAccessType.MATRIX_MATRIX
+ && !(m1.isEmpty() || m2.isEmpty()) )
+ {
ret.allocateBlock(); //chosen outside
-
+ }
+
//execute binary cell operations
long nnz = 0;
if(op.sparseSafe || isSparseSafeDivide(op, m2))
@@ -755,6 +759,10 @@ public class LibMatrixBincell
private static long safeBinaryMMSparseSparse(MatrixBlock m1,
MatrixBlock m2, MatrixBlock ret,
BinaryOperator op, int rl, int ru)
{
+ //guard for postponed allocation in single-threaded exec
+ if( ret.sparse && !ret.isAllocated() )
+ ret.allocateSparseRowsBlock();
+
//both sparse blocks existing
long lnnz = 0;
if(m1.sparseBlock!=null && m2.sparseBlock!=null)
@@ -828,6 +836,10 @@ public class LibMatrixBincell
private static long safeBinaryMMSparseDenseDense(MatrixBlock m1,
MatrixBlock m2, MatrixBlock ret,
BinaryOperator op, int rl, int ru)
{
+ //guard for postponed allocation in single-threaded exec
+ if( !ret.isAllocated() )
+ ret.allocateDenseBlock();
+
//specific case in order to prevent binary search on sparse
inputs (see quickget and quickset)
final int n = ret.clen;
DenseBlock dc = ret.getDenseBlock();
@@ -912,6 +924,10 @@ public class LibMatrixBincell
private static long safeBinaryMMDenseDenseDense(MatrixBlock m1,
MatrixBlock m2, MatrixBlock ret,
BinaryOperator op, int rl, int ru)
{
+ //guard for postponed allocation in single-threaded exec
+ if( !ret.isAllocated() )
+ ret.allocateDenseBlock();
+
DenseBlock da = m1.getDenseBlock();
DenseBlock db = m2.getDenseBlock();
DenseBlock dc = ret.getDenseBlock();
@@ -943,6 +959,10 @@ public class LibMatrixBincell
//prepare second input and allocate output
MatrixBlock b = m1.sparse ? m2 : m1;
+ //guard for postponed allocation in single-threaded exec
+ if( !ret.isAllocated() )
+ ret.allocateBlock();
+
long lnnz = 0;
for( int i=rl; i<Math.min(ru, a.numRows()); i++ ) {
if( a.isEmpty(i) ) continue;
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java
index cb48a1f..46f31ef 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java
@@ -78,11 +78,13 @@ public class LibMatrixTercell
}
}
else {
- unsafeTernary(m1, m2, m3, ret, op, s1, s2, s3, d1, d2,
d3, 0, ret.rlen);
+ long nnz = unsafeTernary(m1, m2, m3, ret, op,
+ s1, s2, s3, d1, d2, d3, 0, ret.rlen);
+ ret.setNonZeros(nnz);
}
}
- private static void unsafeTernary(MatrixBlock m1, MatrixBlock m2,
MatrixBlock m3, MatrixBlock ret,
+ private static long unsafeTernary(MatrixBlock m1, MatrixBlock m2,
MatrixBlock m3, MatrixBlock ret,
TernaryOperator op, boolean s1, boolean s2, boolean s3, double
d1, double d2, double d3, int rl, int ru)
{
//basic ternary operations (all combinations sparse/dense)
@@ -99,7 +101,7 @@ public class LibMatrixTercell
}
//set global output nnz once
- ret.nonZeros = lnnz;
+ return lnnz;
}
private static class TercellTask implements Callable<Long> {
@@ -122,11 +124,10 @@ public class LibMatrixTercell
@Override
public Long call() {
- //execute binary operation on row partition
- unsafeTernary(_m1, _m2, _m3, _ret, _op, _s1, _s2, _s3,
_d1, _d2, _d3, _rl, _ru);
-
- //maintain block nnz (upper bounds inclusive)
- return _ret.recomputeNonZeros(_rl, _ru-1);
+ // execute binary operation on row partition
+ // (including nnz maintenance)
+ return unsafeTernary(_m1, _m2, _m3, _ret, _op,
+ _s1, _s2, _s3, _d1, _d2, _d3, _rl, _ru);
}
}
}