Repository: systemml Updated Branches: refs/heads/master 8a144f2b3 -> fab31fd1f
[SYSTEMML-445] Fixed the error handling during GPU memory cleanup If an error occurs during cleanup of temporary memory and free-ing of GPU context, SystemML does not display the correct error message. This commit fixes this issue. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/fab31fd1 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/fab31fd1 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/fab31fd1 Branch: refs/heads/master Commit: fab31fd1f3b8c832641ba2cd8f2a678ecdfcf043 Parents: 8a144f2 Author: Niketan Pansare <npan...@us.ibm.com> Authored: Tue Oct 9 13:36:45 2018 -0700 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Tue Oct 9 13:36:45 2018 -0700 ---------------------------------------------------------------------- .../apache/sysml/api/ScriptExecutorUtils.java | 45 ++++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/fab31fd1/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java index e32fa29..9956518 100644 --- a/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java +++ b/src/main/java/org/apache/sysml/api/ScriptExecutorUtils.java @@ -75,6 +75,7 @@ public class ScriptExecutorUtils { boolean exceptionThrown = false; Statistics.startRunTimer(); + Exception finalizeException = null; try { // run execute (w/ exception handling to ensure proper shutdown) if (ConfigurationManager.isGPU() && ec != null) { @@ -92,29 +93,34 @@ public class ScriptExecutorUtils { throw e; } finally { // ensure cleanup/shutdown if (ConfigurationManager.isGPU() && !ec.getGPUContexts().isEmpty()) { - // ----------------------------------------------------------------- - // The below code pulls the output variables on the GPU to the host. This is required especially when: - // The output variable was generated as part of a MLContext session with GPU enabled - // and was passed to another MLContext with GPU disabled - // The above scenario occurs in our gpu test suite (eg: BatchNormTest). - if(outputVariables != null) { - for(String outVar : outputVariables) { - Data data = ec.getVariable(outVar); - if(data != null && data instanceof MatrixObject) { - for(GPUContext gCtx : ec.getGPUContexts()) { - GPUObject gpuObj = ((MatrixObject)data).getGPUObject(gCtx); - if(gpuObj != null && gpuObj.isDirty()) { - gpuObj.acquireHostRead(null); + try { + // ----------------------------------------------------------------- + // The below code pulls the output variables on the GPU to the host. This is required especially when: + // The output variable was generated as part of a MLContext session with GPU enabled + // and was passed to another MLContext with GPU disabled + // The above scenario occurs in our gpu test suite (eg: BatchNormTest). + if(outputVariables != null) { + for(String outVar : outputVariables) { + Data data = ec.getVariable(outVar); + if(data != null && data instanceof MatrixObject) { + for(GPUContext gCtx : ec.getGPUContexts()) { + GPUObject gpuObj = ((MatrixObject)data).getGPUObject(gCtx); + if(gpuObj != null && gpuObj.isDirty()) { + gpuObj.acquireHostRead(null); + } } } } } + // ----------------------------------------------------------------- + for(GPUContext gCtx : ec.getGPUContexts()) { + gCtx.clearTemporaryMemory(); + } + GPUContextPool.freeAllGPUContexts(); + } catch (Exception e1) { + exceptionThrown = true; + finalizeException = e1; // do not throw exception while cleanup } - // ----------------------------------------------------------------- - for(GPUContext gCtx : ec.getGPUContexts()) { - gCtx.clearTemporaryMemory(); - } - GPUContextPool.freeAllGPUContexts(); } if( ConfigurationManager.isCodegenEnabled() ) SpoofCompiler.cleanupCodeGenerator(); @@ -126,6 +132,9 @@ public class ScriptExecutorUtils { statisticsMaxHeavyHitters : ConfigurationManager.getDMLOptions().getStatisticsMaxHeavyHitters())); ConfigurationManager.resetStatistics(); } + if(finalizeException != null) { + throw new DMLRuntimeException("Error occured while GPU memory cleanup.", finalizeException); + } } }