[SYSTEMML-2017] Fix missing bufferpool cleanup of frame intermediates So far any rmvar, cpvar, mvvar instructions only trigger the cleanup of buffer pool in-memory objects, evicted files and hdfs files for matrices but not frames. This creates a leak of objects and files that causes severe performance issues if frame intermediates are creates in iterative algorithms. This patch now generalizes all related code paths to CacheableData which is the super class of MatrixObjects and FrameObjects.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/7dc61c05 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/7dc61c05 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/7dc61c05 Branch: refs/heads/master Commit: 7dc61c05b609aec4afc5a408cc8fe5463bdeb7aa Parents: 1e0a415 Author: Matthias Boehm <mboe...@gmail.com> Authored: Thu Nov 16 00:04:10 2017 -0800 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Thu Nov 16 00:11:52 2017 -0800 ---------------------------------------------------------------------- src/main/java/org/apache/sysml/parser/Expression.java | 3 +++ .../runtime/controlprogram/ParForProgramBlock.java | 4 ++-- .../controlprogram/context/ExecutionContext.java | 2 +- .../controlprogram/context/SparkExecutionContext.java | 2 +- .../controlprogram/parfor/opt/OptimizerRuleBased.java | 2 +- .../instructions/cp/FunctionCallCPInstruction.java | 10 +++++----- .../instructions/cp/VariableCPInstruction.java | 14 +++++++------- 7 files changed, 20 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/7dc61c05/src/main/java/org/apache/sysml/parser/Expression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/Expression.java b/src/main/java/org/apache/sysml/parser/Expression.java index 1d7303e..fa68870 100644 --- a/src/main/java/org/apache/sysml/parser/Expression.java +++ b/src/main/java/org/apache/sysml/parser/Expression.java @@ -173,6 +173,9 @@ public abstract class Expression implements ParseInfo public boolean isMatrix() { return (this == MATRIX); } + public boolean isFrame() { + return (this == FRAME); + } public boolean isScalar() { return (this == SCALAR); } http://git-wip-us.apache.org/repos/asf/systemml/blob/7dc61c05/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java index 4775494..d534fe0 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java @@ -1239,7 +1239,7 @@ public class ParForProgramBlock extends ForProgramBlock for( MatrixObject tmp : in ) { //check for empty inputs (no iterations executed) if( tmp != null && tmp != out ) - ec.cleanupMatrixObject(tmp); + ec.cleanupCacheableData(tmp); } } @@ -1705,7 +1705,7 @@ public class ParForProgramBlock extends ForProgramBlock //cleanup existing var Data exdata = ec.removeVariable(var); if( exdata != null && exdata != outNew && exdata instanceof MatrixObject ) - ec.cleanupMatrixObject((MatrixObject)exdata); + ec.cleanupCacheableData((MatrixObject)exdata); //cleanup of intermediate result variables cleanWorkerResultVariables( ec, out, in ); http://git-wip-us.apache.org/repos/asf/systemml/blob/7dc61c05/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java index 67e91b0..af8c8b9 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java @@ -608,7 +608,7 @@ public class ExecutionContext { return ret; } - public void cleanupMatrixObject(MatrixObject mo) + public void cleanupCacheableData(CacheableData<?> mo) throws DMLRuntimeException { //early abort w/o scan of symbol table if no cleanup required http://git-wip-us.apache.org/repos/asf/systemml/blob/7dc61c05/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java index ff47b3a..39e52f5 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java @@ -1087,7 +1087,7 @@ public class SparkExecutionContext extends ExecutionContext } @Override - public void cleanupMatrixObject( MatrixObject mo ) + public void cleanupCacheableData( CacheableData<?> mo ) throws DMLRuntimeException { //NOTE: this method overwrites the default behavior of cleanupMatrixObject http://git-wip-us.apache.org/repos/asf/systemml/blob/7dc61c05/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java index eca87da..3e99128 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java @@ -2078,7 +2078,7 @@ public class OptimizerRuleBased extends Optimizer { //replace existing matrix object with empty matrix MatrixObject mo = (MatrixObject)dat; - ec.cleanupMatrixObject(mo); + ec.cleanupCacheableData(mo); ec.setMatrixOutput(rvar, new MatrixBlock((int)mo.getNumRows(), (int)mo.getNumColumns(),false), null); //keep track of cleaned result variables http://git-wip-us.apache.org/repos/asf/systemml/blob/7dc61c05/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java index b36365c..953c365 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java @@ -32,7 +32,7 @@ import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.DMLScriptException; import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock; import org.apache.sysml.runtime.controlprogram.LocalVariableMap; -import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; +import org.apache.sysml.runtime.controlprogram.caching.CacheableData; import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory; import org.apache.sysml.runtime.instructions.Instruction; @@ -180,8 +180,8 @@ public class FunctionCallCPInstruction extends CPInstruction { if( expectRetVars.contains(var.getKey()) ) continue; //cleanup unexpected return values to avoid leaks - if( var.getValue() instanceof MatrixObject ) - fn_ec.cleanupMatrixObject((MatrixObject)var.getValue()); + if( var.getValue() instanceof CacheableData ) + fn_ec.cleanupCacheableData((CacheableData<?>)var.getValue()); } // Unpin the pinned variables @@ -196,8 +196,8 @@ public class FunctionCallCPInstruction extends CPInstruction { //cleanup existing data bound to output variable name Data exdata = ec.removeVariable(boundVarName); - if ( exdata != null && exdata instanceof MatrixObject && exdata != boundValue ) { - ec.cleanupMatrixObject( (MatrixObject)exdata ); + if ( exdata != null && exdata instanceof CacheableData && exdata != boundValue ) { + ec.cleanupCacheableData( (CacheableData<?>)exdata ); } //add/replace data in symbol table http://git-wip-us.apache.org/repos/asf/systemml/blob/7dc61c05/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java index 92750f7..9e81b50 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java @@ -698,13 +698,13 @@ public class VariableCPInstruction extends CPInstruction { + "for variable name:" + getInput1().getName() + ", while processing instruction "); } - if( getInput2().getDataType().isMatrix() ) { + if( getInput2().getDataType().isMatrix() || getInput2().getDataType().isFrame() ) { // remove existing variable bound to target name Data tgt = ec.removeVariable(getInput2().getName()); //cleanup matrix data on fs/hdfs (if necessary) - if ( tgt != null && tgt instanceof MatrixObject ) { - ec.cleanupMatrixObject((MatrixObject) tgt); + if ( tgt != null && tgt instanceof CacheableData ) { + ec.cleanupCacheableData((CacheableData<?>) tgt); } } @@ -754,8 +754,8 @@ public class VariableCPInstruction extends CPInstruction { Data input2_data = ec.removeVariable(getInput2().getName()); //cleanup matrix data on fs/hdfs (if necessary) - if ( input2_data != null && input2_data instanceof MatrixObject ) { - ec.cleanupMatrixObject((MatrixObject) input2_data); + if ( input2_data != null && input2_data instanceof CacheableData ) { + ec.cleanupCacheableData((CacheableData<?>) input2_data); } // do the actual copy! @@ -820,8 +820,8 @@ public class VariableCPInstruction extends CPInstruction { throw new DMLRuntimeException("Unexpected error: could not find a data object for variable name:" + varname + ", while processing rmvar instruction."); //cleanup matrix data on fs/hdfs (if necessary) - if ( input1_data instanceof MatrixObject ) { - ec.cleanupMatrixObject( (MatrixObject) input1_data ); + if ( input1_data instanceof CacheableData ) { + ec.cleanupCacheableData( (CacheableData<?>) input1_data ); } }