Repository: incubator-systemml Updated Branches: refs/heads/master 5decbe64b -> 38d087a76
Minor improvement of parfor eager caching (avoid count if cached) This patch avoids the count (as used for eager caching to avoid contention of concurrent jobs) if the rdd is already cached. This improvement primarily applies to very large data that exceeds aggregated memory, where the counts constitutes an expensive disk-based scan over all partitions. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0750f35a Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0750f35a Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0750f35a Branch: refs/heads/master Commit: 0750f35a072694f1540ce91d3117ff478b2caf96 Parents: 5decbe6 Author: Matthias Boehm <mbo...@us.ibm.com> Authored: Tue Sep 20 22:55:54 2016 -0700 Committer: Matthias Boehm <mbo...@us.ibm.com> Committed: Tue Sep 20 22:55:54 2016 -0700 ---------------------------------------------------------------------- .../runtime/controlprogram/context/SparkExecutionContext.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0750f35a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java index 02c900d..f95e0d4 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java @@ -1281,8 +1281,9 @@ public class SparkExecutionContext extends ExecutionContext JavaPairRDD<MatrixIndexes,MatrixBlock> in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForMatrixObject(mo, InputInfo.BinaryBlockInputInfo); - //persist rdd (force rdd caching) - in.count(); //trigger caching to prevent contention + //persist rdd (force rdd caching, if not already cached) + if( !isRDDCached(in.id()) ) + in.count(); //trigger caching to prevent contention } /**