Repository: incubator-systemml
Updated Branches:
  refs/heads/master 6550c04b9 -> ecf5e1b4c


[SYSTEMML-896] Fix mlcontext scratch-space / bufferpool cleanup, tests

Although we allow that intermediate matrices and frames are passed from
one script invocation to another, every script execution initializes the
scratch-space and buffer pool which cleanups up all existing files. This
might delete files that still backup pending rdd operations or
matrix/frame objects. This patch fixes this issue by initializing the
scratch space only on the first execution. Furthermore, we now also
properly cleanup the scratch_space on mlcontext close. 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ecf5e1b4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ecf5e1b4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ecf5e1b4

Branch: refs/heads/master
Commit: ecf5e1b4c4d8135bd220d380a0d824996392ecc2
Parents: 6550c04
Author: Matthias Boehm <mbo...@us.ibm.com>
Authored: Tue Sep 20 15:47:14 2016 -0700
Committer: Matthias Boehm <mbo...@us.ibm.com>
Committed: Tue Sep 20 15:47:14 2016 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/api/DMLScript.java    |   2 +-
 .../apache/sysml/api/mlcontext/MLContext.java   |  42 +++---
 .../sysml/api/mlcontext/ScriptExecutor.java     |  34 +++--
 .../mlcontext/MLContextScratchCleanupTest.java  | 133 +++++++++++++++++++
 .../sysml/api/mlcontext/ScratchCleanup1.dml     |  29 ++++
 .../sysml/api/mlcontext/ScratchCleanup2.dml     |  24 ++++
 .../sysml/api/mlcontext/ScratchCleanup2b.dml    |  25 ++++
 7 files changed, 253 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ecf5e1b4/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java 
b/src/main/java/org/apache/sysml/api/DMLScript.java
index daf2144..a9e4a8e 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -871,7 +871,7 @@ public class DMLScript
         * @throws IOException
         * @throws ParseException
         */
-       private static void cleanupHadoopExecution( DMLConfig config ) 
+       public static void cleanupHadoopExecution( DMLConfig config ) 
                throws IOException, ParseException
        {
                //create dml-script-specific suffix

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ecf5e1b4/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java 
b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
index 281371d..2741c68 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
@@ -41,7 +41,6 @@ import org.apache.sysml.parser.IntIdentifier;
 import org.apache.sysml.parser.StringIdentifier;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.instructions.Instruction;
@@ -225,14 +224,6 @@ public class MLContext {
        }
 
        /**
-        * Clean up the variables from the buffer pool, including evicted files,
-        * because the buffer pool holds references.
-        */
-       public void clearCache() {
-               CacheableData.cleanupCacheDir();
-       }
-
-       /**
         * Reset configuration settings to default settings.
         */
        public void resetConfig() {
@@ -268,6 +259,7 @@ public class MLContext {
                scriptExecutor.setExplain(explain);
                scriptExecutor.setExplainLevel(explainLevel);
                scriptExecutor.setStatistics(statistics);
+               scriptExecutor.setInit(scriptHistoryStrings.isEmpty());
                return execute(script, scriptExecutor);
        }
 
@@ -552,31 +544,31 @@ public class MLContext {
        }
 
        /**
-        * Clear all the scripts, removing them from the history, and clear the
-        * cache.
+        * Closes the mlcontext, which includes the cleanup of static and local
+        * state as well as scratch space and buffer pool cleanup. Note that the
+        * spark context is not explicitly closed to allow external reuse.
         */
-       public void clear() {
-               Set<String> scriptNames = scripts.keySet();
-               for (String scriptName : scriptNames) {
-                       Script script = scripts.get(scriptName);
-                       script.clearAll();
-               }
-
-               scripts.clear();
-               scriptHistoryStrings.clear();
-
-               clearCache();
-       }
-
        public void close() {
                // reset static status (refs to sc / mlcontext)
                SparkExecutionContext.resetSparkContextStatic();
                MLContextProxy.setActive(false);
                activeMLContext = null;
 
+               // cleanup scratch space and buffer pool
+               try {
+                       DMLScript.cleanupHadoopExecution(
+                                       ConfigurationManager.getDMLConfig());
+               }
+               catch(Exception ex) {
+                       throw new MLContextException("Failed to cleanup working 
directories.", ex);
+               }
+               
                // clear local status, but do not stop sc as it
                // may be used or stopped externally
-               clear();
+               for (Script script : scripts.values()) 
+                       script.clearAll();
+               scripts.clear();
+               scriptHistoryStrings.clear();
                resetConfig();
                sc = null;
        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ecf5e1b4/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java 
b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
index 17bae7a..57e0076 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java
@@ -114,6 +114,7 @@ public class ScriptExecutor {
        protected Program runtimeProgram;
        protected ExecutionContext executionContext;
        protected Script script;
+       protected boolean init = false;
        protected boolean explain = false;
        protected boolean statistics = false;
        protected ExplainLevel explainLevel;
@@ -198,16 +199,16 @@ public class ScriptExecutor {
         * Output a description of the program to standard output.
         */
        protected void showExplanation() {
-               if (explain) {
-                       try {
-                               ExplainType explainType = (explainLevel != 
null) ? 
-                                               explainLevel.getExplainType() : 
ExplainType.RUNTIME;
-                               System.out.println(Explain.explain(dmlProgram, 
runtimeProgram, explainType));
-                       } 
-                       catch (Exception e) {
-                               throw new MLContextException("Exception 
occurred while explaining dml program", e);
-                       } 
-               }
+               if( !explain ) return;
+                       
+               try {
+                       ExplainType explainType = (explainLevel != null) ? 
+                                       explainLevel.getExplainType() : 
ExplainType.RUNTIME;
+                       System.out.println(Explain.explain(dmlProgram, 
runtimeProgram, explainType));
+               } 
+               catch (Exception e) {
+                       throw new MLContextException("Exception occurred while 
explaining dml program", e);
+               } 
        }
 
        /**
@@ -390,6 +391,8 @@ public class ScriptExecutor {
         * initialize caching, and reset statistics.
         */
        protected void initializeCachingAndScratchSpace() {
+               if( !init ) return;
+               
                try {
                        DMLScript.initHadoopExecution(config);
                } catch (ParseException e) {
@@ -622,6 +625,17 @@ public class ScriptExecutor {
        public void setStatistics(boolean statistics) {
                this.statistics = statistics;
        }
+       
+       /**
+        * Whether or not to initialize the scratch_space, bufferpool, etc. 
Note that any 
+        * redundant initialize (e.g., multiple scripts from one MLContext) 
clears existing 
+        * files from the scratch space and buffer pool.
+        *  
+        * @param init
+        */
+       public void setInit(boolean init) {
+               this.init = init;
+       }
 
        /**
         * Set the level of program explanation that should be displayed if 
explain

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ecf5e1b4/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextScratchCleanupTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextScratchCleanupTest.java
 
b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextScratchCleanupTest.java
new file mode 100644
index 0000000..3bb7f71
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextScratchCleanupTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.mlcontext;
+
+import static org.apache.sysml.api.mlcontext.ScriptFactory.dmlFromFile;
+
+import java.io.File;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.api.mlcontext.MLContext;
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.api.mlcontext.Script;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.After;
+import org.junit.Test;
+
+
+public class MLContextScratchCleanupTest extends AutomatedTestBase 
+{
+       private final static String TEST_DIR = "org/apache/sysml/api/mlcontext";
+       private final static String TEST_NAME = "MLContextScratchCleanup";
+
+       private final static int rows = 1123;
+       private final static int cols = 789;
+       
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Test
+       public void testMLContextMultipleScriptsCP() {
+               runMLContextTestMultipleScript(RUNTIME_PLATFORM.SINGLE_NODE, 
false);
+       }
+       
+       @Test
+       public void testMLContextMultipleScriptsHybrid() {
+               runMLContextTestMultipleScript(RUNTIME_PLATFORM.HYBRID_SPARK, 
false);
+       }
+       
+       @Test
+       public void testMLContextMultipleScriptsSpark() {
+               runMLContextTestMultipleScript(RUNTIME_PLATFORM.SPARK, false);
+       }
+       
+       @Test
+       public void testMLContextMultipleScriptsWithReadCP() {
+               runMLContextTestMultipleScript(RUNTIME_PLATFORM.SINGLE_NODE, 
true);
+       }
+       
+       @Test
+       public void testMLContextMultipleScriptsWithReadHybrid() {
+               runMLContextTestMultipleScript(RUNTIME_PLATFORM.HYBRID_SPARK, 
true);
+       }
+       
+       @Test
+       public void testMLContextMultipleScriptsWithReadSpark() {
+               runMLContextTestMultipleScript(RUNTIME_PLATFORM.SPARK, true);
+       }
+
+       /**
+        * 
+        * @param platform
+        */
+       private void runMLContextTestMultipleScript(RUNTIME_PLATFORM platform, 
boolean wRead) 
+       {
+               RUNTIME_PLATFORM oldplatform = DMLScript.rtplatform;
+               DMLScript.rtplatform = platform;
+               
+               //create mlcontext
+               SparkConf conf = new 
SparkConf().setAppName("MLContextFrameTest").setMaster("local");
+               JavaSparkContext sc = new JavaSparkContext(conf);
+               MLContext ml = new MLContext(sc);
+               ml.setExplain(true);
+               
+               String dml1 = baseDirectory + File.separator + 
"ScratchCleanup1.dml";
+               String dml2 = baseDirectory + File.separator + 
(wRead?"ScratchCleanup2b.dml":"ScratchCleanup2.dml");
+               
+               try
+               {
+                       Script script1 = dmlFromFile(dml1).in("$rows", 
rows).in("$cols", cols).out("X");
+                       Matrix X = ml.execute(script1).getMatrix("X");
+                       
+                       //clear in-memory/cached data to emulate on-disk storage
+                       X.toMatrixObject().clearData();
+                       
+                       Script script2 = dmlFromFile(dml2).in("X", X).out("z");
+                       String z = ml.execute(script2).getString("z");
+                       
+                       System.out.println(z);
+               }
+               catch(Exception ex) {
+                       throw new RuntimeException(ex);
+               }
+               finally {
+                       DMLScript.rtplatform = oldplatform;
+                       
+                       // stop spark context to allow single jvm tests 
(otherwise the
+                       // next test that tries to create a SparkContext would 
fail)
+                       sc.stop();
+                       // clear status mlcontext and spark exec context
+                       ml.close();
+               }
+       }
+
+       @After
+       public void tearDown() {
+               super.tearDown();
+       }
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ecf5e1b4/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup1.dml
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup1.dml 
b/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup1.dml
new file mode 100644
index 0000000..1fb2c7e
--- /dev/null
+++ b/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup1.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = rand(rows=$rows, cols=$cols);
+
+#force export of X via remote parfor
+parfor(i in 1:ncol(X), opt=CONSTRAINED, mode=REMOTE_SPARK) {
+   print(sum(X[,i]));
+}
+
+write(X, "out/X", format="binary");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ecf5e1b4/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2.dml
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2.dml 
b/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2.dml
new file mode 100644
index 0000000..ccaa618
--- /dev/null
+++ b/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2.dml
@@ -0,0 +1,24 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+Y = X^2 + X;
+z = sum(Y + sum(X));
+write(z, "out/z");
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ecf5e1b4/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2b.dml
----------------------------------------------------------------------
diff --git 
a/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2b.dml 
b/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2b.dml
new file mode 100644
index 0000000..f8b46d5
--- /dev/null
+++ b/src/test/scripts/org/apache/sysml/api/mlcontext/ScratchCleanup2b.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read("out/X", format="binary");
+Y = X^2 + X;
+z = sum(Y + sum(X));
+write(z, "out/z");
\ No newline at end of file

Reply via email to