[SYSTEMML-2182] Fine-tuning parfor optimizer (avoid spark ctx creation) This patch improves the parfor optimizer rewrite for selecting local or remote execution types. We now also take into consideration if the spark context is already created. On small data like perftest univariate stats, 10K x 1K (through spark submit), this patch improved performance from 38s to 4s.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a83c2501 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a83c2501 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a83c2501 Branch: refs/heads/master Commit: a83c2501974aff9729c3e31b130b9efa731ab336 Parents: 880cc66 Author: Matthias Boehm <mboe...@gmail.com> Authored: Sun Mar 11 19:28:43 2018 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sun Mar 11 19:29:06 2018 -0700 ---------------------------------------------------------------------- .../runtime/controlprogram/parfor/opt/OptimizerRuleBased.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/a83c2501/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java index 49e619d..584a1fa 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java @@ -893,8 +893,11 @@ public class OptimizerRuleBased extends Optimizer //rule-based decision based on number of outer iterations or maximum number of //inner iterations (w/ appropriately scaled minimum data size threshold); + boolean isCtxCreated = OptimizerUtils.isSparkExecutionMode() + && SparkExecutionContext.isSparkContextCreated(); return (_N >= PROB_SIZE_THRESHOLD_REMOTE && M > PROB_SIZE_THRESHOLD_MB) - || (_Nmax >= 10 * PROB_SIZE_THRESHOLD_REMOTE && M > PROB_SIZE_THRESHOLD_MB/10); + || (_Nmax >= 10 * PROB_SIZE_THRESHOLD_REMOTE + && M > PROB_SIZE_THRESHOLD_MB/(isCtxCreated?10:1)); } protected boolean isCPOnlyPossible( OptNode n, double memBudget )