[SYSTEMML-2267] Generalized multi-threaded unary ops dense blocks >16GB

This patch generalized the newly introduced multi-threaded unary
operations for large dense blocks >16GB by processing a physical block
at a time via parallelSetAll.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/2f278bc2
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/2f278bc2
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/2f278bc2

Branch: refs/heads/master
Commit: 2f278bc2ac85d391b9353124ce85b7db884cba5b
Parents: c1a7f85
Author: Matthias Boehm <[email protected]>
Authored: Fri Apr 20 19:44:37 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Fri Apr 20 19:44:37 2018 -0700

----------------------------------------------------------------------
 .../apache/sysml/runtime/matrix/data/MatrixBlock.java    | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/2f278bc2/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index bb5e79b..9e032b6 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -2584,15 +2584,18 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        else
                                LibMatrixAgg.cumaggregateUnaryMatrix(this, ret, 
op);
                }
-               else if(!sparse && !isEmptyBlock(false) && 
getDenseBlock().isContiguous()
+               else if(!sparse && !isEmptyBlock(false)
                        && 
OptimizerUtils.isMaxLocalParallelism(op.getNumThreads())) {
                        //note: we apply multi-threading in a best-effort 
manner here
                        //only for expensive operators such as exp, log, 
sigmoid, because
                        //otherwise allocation, read and write anyway dominates
                        ret.allocateDenseBlock(false);
-                       double[] a = getDenseBlockValues();
-                       double[] c = ret.getDenseBlockValues();
-                       Arrays.parallelSetAll(c, i -> op.fn.execute(a[i]));
+                       DenseBlock a = getDenseBlock();
+                       DenseBlock c = ret.getDenseBlock();
+                       for(int bi=0; bi<a.numBlocks(); bi++) {
+                               double[] avals = a.valuesAt(bi), cvals = 
c.valuesAt(bi);
+                               Arrays.parallelSetAll(cvals, i -> 
op.fn.execute(avals[i]));
+                       }
                        ret.recomputeNonZeros();
                }
                else {

Reply via email to