Repository: systemml
Updated Branches:
  refs/heads/master 7fb38b9b0 -> 9ae0a9167


[SYSTEMML-2487] Fix parfor parallelism rewrite (awareness native libs)

In case parfor does not consume all the available parallelism, we
propagate this parallelism down to individual operations with slight
(max 50%) over-provisioning. For example, if we have 80 vcores, and
parfor is assigned k=47, we still assign k=2 to individual operations.

However, with native BLAS or DNN operations this causes JVM crashes due
to SIGFPE with Intel MKL and too few memory buffers with OpenBLAS.
Hence, this patch improves the related parfor rewrite to select the
degree of parallelism with awareness of loaded native libraries.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9ae0a916
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9ae0a916
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9ae0a916

Branch: refs/heads/master
Commit: 9ae0a916777dff0fa36b769f773aed6f64253f1c
Parents: 7fb38b9
Author: Matthias Boehm <[email protected]>
Authored: Wed Aug 8 14:36:15 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Wed Aug 8 14:36:54 2018 -0700

----------------------------------------------------------------------
 .../controlprogram/parfor/opt/OptimizerRuleBased.java       | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/9ae0a916/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index e7c7a6e..7adf2d8 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -82,6 +82,7 @@ import 
org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.util.ProgramConverter;
+import org.apache.sysml.utils.NativeHelper;
 import org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile;
 import 
org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.ExcludeType;
 import 
org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
@@ -1335,7 +1336,7 @@ public class OptimizerRuleBased extends Optimizer
                                                mhop.setMaxNumThreads(opsK); 
//set max constraint in hop
                                                c.setK(opsK); //set optnode k 
(for explain)
                                                //need to recompile SB, if 
changed constraint
-                                               recompileSB = true;     
+                                               recompileSB = true;
                                        }
                                        //for all other multi-threaded hops set 
k=1 to simply debugging
                                        else if( h instanceof MultiThreadedHop 
) {
@@ -1371,7 +1372,11 @@ public class OptimizerRuleBased extends Optimizer
                //compute max remaining operations parallelism k with slight 
over-provisioning 
                //such that k * tmpK <= 1.5 * opsK; note that if parfor already 
exploits the
                //maximum parallelism, this will not introduce any 
over-provisioning.
-               return (int)Math.max(Math.round((double)opsK / tmpK), 1);
+               //(when running with native BLAS/DNN libraries, we disable 
over-provisioning
+               //to avoid internal SIGFPE and allocation buffer issues w/ MKL 
and OpenBlas)
+               return NativeHelper.isNativeLibraryLoaded() ?
+                       (int) Math.max(opsK / tmpK, 1) :
+                       (int) Math.max(Math.round((double)opsK / tmpK), 1);
        }
        
        ///////

Reply via email to