Repository: systemml
Updated Branches:
  refs/heads/master 8af7a9ea3 -> 6bb136911


[MINOR] Improved JMLC memory profiling, command line arg -mem

Closes #797.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/6bb13691
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/6bb13691
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/6bb13691

Branch: refs/heads/master
Commit: 6bb136911b290b8c75ee16ea7da199bcc9dd0ba6
Parents: 8af7a9e
Author: Anthony Thomas <[email protected]>
Authored: Thu Jul 12 21:30:22 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Thu Jul 12 21:30:23 2018 -0700

----------------------------------------------------------------------
 docs/jmlc.md                                    |   8 +-
 .../java/org/apache/sysml/api/DMLScript.java    | 148 ++++++++-----------
 .../org/apache/sysml/api/jmlc/Connection.java   |   2 +-
 .../controlprogram/LocalVariableMap.java        |   2 +-
 .../runtime/controlprogram/ProgramBlock.java    |   2 +-
 .../controlprogram/caching/CacheableData.java   |  14 +-
 .../context/ExecutionContext.java               |   3 +
 .../context/SparkExecutionContext.java          |   3 +
 .../java/org/apache/sysml/utils/Statistics.java |  38 +----
 9 files changed, 79 insertions(+), 141 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/docs/jmlc.md
----------------------------------------------------------------------
diff --git a/docs/jmlc.md b/docs/jmlc.md
index a703d01..08d1688 100644
--- a/docs/jmlc.md
+++ b/docs/jmlc.md
@@ -55,12 +55,8 @@ JMLC can be configured to gather runtime statistics, as in 
the MLContext API, by
 method with a value of `true`. JMLC can also be configured to gather 
statistics on the memory used by matrices and
 frames in the DML script. To enable collection of memory statistics, call 
Connection's `gatherMemStats()` method
 with a value of `true`. When finegrained statistics are enabled in 
`SystemML.conf`, JMLC will also report the variables
-in the DML script which used the most memory. By default, the memory use 
reported will be an overestimte of the actual
-memory required to run the program. When finegrained statistics are enabled, 
JMLC will gather more accurate statistics
-by keeping track of garbage collection events and reducing the memory estimate 
accordingly. The most accurate way to
-determine the memory required by a script is to run the script in a single 
thread and enable finegrained statistics.
-
-An example showing how to enable statistics in JMLC is presented in the 
section below.
+in the DML script which used the most memory. An example showing how to enable 
statistics in JMLC is presented in the
+section below.
 
 ---
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java 
b/src/main/java/org/apache/sysml/api/DMLScript.java
index 9ecd83b..227c14b 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -97,11 +97,11 @@ import org.apache.sysml.yarn.DMLYarnClientProxy;
 public class DMLScript 
 {      
        public enum RUNTIME_PLATFORM { 
-               HADOOP,             // execute all matrix operations in MR
+               HADOOP,         // execute all matrix operations in MR
                SINGLE_NODE,    // execute all matrix operations in CP
                HYBRID,         // execute matrix operations in CP or MR
                HYBRID_SPARK,   // execute matrix operations in CP or Spark
-               SPARK                   // execute matrix operations in Spark
+               SPARK           // execute matrix operations in Spark
        }
        
        /**
@@ -130,7 +130,8 @@ public class DMLScript
                public String               configFile    = null;             
// Path to config file if default config and default config is to be overriden
                public boolean              clean         = false;            
// Whether to clean up all SystemML working directories (FS, DFS)
                public boolean              stats         = false;            
// Whether to record and print the statistics
-               public int                  statsCount    = 10;               
// Default statistics count
+               public int                  statsCount    = 10;               
// Default statistics count
+               public boolean              memStats      = false;            
// max memory statistics
                public Explain.ExplainType  explainType   = 
Explain.ExplainType.NONE;  // Whether to print the "Explain" and if so, what 
type
                public DMLScript.RUNTIME_PLATFORM execMode = 
OptimizerUtils.getDefaultExecutionMode();  // Execution mode standalone, MR, 
Spark or a hybrid
                public boolean              gpu           = false;            
// Whether to use the GPU
@@ -146,28 +147,29 @@ public class DMLScript
                @Override
                public String toString() {
                        return "DMLOptions{" +
-                                                       "argVals=" + argVals +
-                                                       ", configFile='" + 
configFile + '\'' +
-                                                       ", clean=" + clean +
-                                                       ", stats=" + stats +
-                                                       ", statsCount=" + 
statsCount +
-                                                       ", explainType=" + 
explainType +
-                                                       ", execMode=" + 
execMode +
-                                                       ", gpu=" + gpu +
-                                                       ", forceGPU=" + 
forceGPU +
-                                                       ", debug=" + debug +
-                                                       ", scriptType=" + 
scriptType +
-                                                       ", filePath='" + 
filePath + '\'' +
-                                                       ", script='" + script + 
'\'' +
-                                                       ", help=" + help +
-                                                       '}';
+                               "argVals=" + argVals +
+                               ", configFile='" + configFile + '\'' +
+                               ", clean=" + clean +
+                               ", stats=" + stats +
+                               ", statsCount=" + statsCount +
+                               ", memStats=" + memStats +
+                               ", explainType=" + explainType +
+                               ", execMode=" + execMode +
+                               ", gpu=" + gpu +
+                               ", forceGPU=" + forceGPU +
+                               ", debug=" + debug +
+                               ", scriptType=" + scriptType +
+                               ", filePath='" + filePath + '\'' +
+                               ", script='" + script + '\'' +
+                               ", help=" + help +
+                               '}';
                }
        }
 
        public static RUNTIME_PLATFORM  rtplatform          = 
DMLOptions.defaultOptions.execMode;    // the execution mode
        public static boolean           STATISTICS          = 
DMLOptions.defaultOptions.stats;       // whether to print statistics
        public static boolean           FINEGRAINED_STATISTICS  = false;        
                     // whether to print fine-grained statistics
-       public static boolean           JMLC_MEMORY_STATISTICS = false;         
                     // whether to gather memory use stats in JMLC
+       public static boolean           JMLC_MEM_STATISTICS = false;            
                     // whether to gather memory use stats in JMLC
        public static int               STATISTICS_COUNT    = 
DMLOptions.defaultOptions.statsCount;  // statistics maximum heavy hitter count
        public static int               STATISTICS_MAX_WRAP_LEN = 30;           
                     // statistics maximum wrap length
        public static boolean           ENABLE_DEBUG_MODE   = 
DMLOptions.defaultOptions.debug;       // debug mode
@@ -315,6 +317,7 @@ public class DMLScript
                                }
                        }
                }
+               dmlOptions.memStats = line.hasOption("mem");
 
                dmlOptions.clean = line.hasOption("clean");
 
@@ -390,9 +393,11 @@ public class DMLScript
                Option cleanOpt = OptionBuilder.withDescription("cleans up all 
SystemML working directories (FS, DFS); all other flags are ignored in this 
mode. \n")
                                                .create("clean");
                Option statsOpt = OptionBuilder.withArgName("count")
-                                               .withDescription("monitors and 
reports caching/recompilation statistics; heavy hitter <count> is 10 unless 
overridden; default off")
+                                               .withDescription("monitors and 
reports summary execution statistics; heavy hitter <count> is 10 unless 
overridden; default off")
                                                .hasOptionalArg()
                                                .create("stats");
+               Option memOpt = OptionBuilder.withDescription("monitors and 
reports max memory consumption in CP; default off")
+                                               .create("mem");
                Option explainOpt = OptionBuilder.withArgName("level")
                                                .withDescription("explains plan 
levels; can be 'hops' / 'runtime'[default] / 'recompile_hops' / 
'recompile_runtime'")
                                                .hasOptionalArg()
@@ -436,6 +441,7 @@ public class DMLScript
                options.addOption(configOpt);
                options.addOption(cleanOpt);
                options.addOption(statsOpt);
+               options.addOption(memOpt);
                options.addOption(explainOpt);
                options.addOption(execOpt);
                options.addOption(gpuOpt);
@@ -465,17 +471,15 @@ public class DMLScript
                {
                        DMLOptions dmlOptions = parseCLArguments(args, options);
 
-                       // String[] scriptArgs = null; //optional script 
arguments
-                       // boolean namedScriptArgs = false;
-
-                       STATISTICS        = dmlOptions.stats;
-                       STATISTICS_COUNT  = dmlOptions.statsCount;
-                       USE_ACCELERATOR   = dmlOptions.gpu;
-                       FORCE_ACCELERATOR = dmlOptions.forceGPU;
-                       EXPLAIN           = dmlOptions.explainType;
-                       ENABLE_DEBUG_MODE = dmlOptions.debug;
-                       SCRIPT_TYPE       = dmlOptions.scriptType;
-                       rtplatform        = dmlOptions.execMode;
+                       STATISTICS          = dmlOptions.stats;
+                       STATISTICS_COUNT    = dmlOptions.statsCount;
+                       JMLC_MEM_STATISTICS = dmlOptions.memStats;
+                       USE_ACCELERATOR     = dmlOptions.gpu;
+                       FORCE_ACCELERATOR   = dmlOptions.forceGPU;
+                       EXPLAIN             = dmlOptions.explainType;
+                       ENABLE_DEBUG_MODE   = dmlOptions.debug;
+                       SCRIPT_TYPE         = dmlOptions.scriptType;
+                       rtplatform          = dmlOptions.execMode;
 
                        String fnameOptConfig = dmlOptions.configFile;
                        boolean isFile = dmlOptions.filePath != null;
@@ -517,35 +521,26 @@ public class DMLScript
                        else {
                                execute(dmlScriptStr, fnameOptConfig, argVals, 
args, SCRIPT_TYPE);
                        }
-
                }
-               catch(AlreadySelectedException e)
-               {
+               catch(AlreadySelectedException e) {
                        System.err.println("Mutually exclusive options were 
selected. " + e.getMessage());
                        HelpFormatter formatter = new HelpFormatter();
                        formatter.printHelp( "systemml", options );
                        return false;
                }
-               catch(org.apache.commons.cli.ParseException e)
-               {
+               catch(org.apache.commons.cli.ParseException e) {
                        System.err.println(e.getMessage());
                        HelpFormatter formatter = new HelpFormatter();
                        formatter.printHelp( "systemml", options );
                }
-               catch (ParseException pe) {
-                       throw pe;
-               }
-               catch (DMLScriptException e) {
-                       //rethrow DMLScriptException to propagate stop call
+               catch (ParseException | DMLScriptException e) {
                        throw e;
                }
-               catch(Exception ex)
-               {
+               catch(Exception ex) {
                        LOG.error("Failed to execute DML script.", ex);
                        throw new DMLException(ex);
                }
-               finally
-               {
+               finally {
                        //reset runtime platform and visualize flag
                        rtplatform = oldrtplatform;
                        EXPLAIN = oldexplain;
@@ -626,20 +621,15 @@ public class DMLScript
        }
 
        
-       private static void setLoggingProperties( Configuration conf )
-       {
+       private static void setLoggingProperties( Configuration conf ) {
                String debug = conf.get("systemml.logging");
-               
                if (debug == null)
                        debug = System.getProperty("systemml.logging");
-               
                if (debug != null){
-                       if (debug.equalsIgnoreCase("debug")){
+                       if (debug.equalsIgnoreCase("debug"))
                                
Logger.getLogger("org.apache.sysml").setLevel((Level) Level.DEBUG);
-                       }
-                       else if (debug.equalsIgnoreCase("trace")){
+                       else if (debug.equalsIgnoreCase("trace"))
                                
Logger.getLogger("org.apache.sysml").setLevel((Level) Level.TRACE);
-                       }
                }
        }
        
@@ -825,7 +815,7 @@ public class DMLScript
                //init caching (incl set active)
                LocalFileUtils.createWorkingDirectory();
                CacheableData.initCaching();
-                                               
+               
                //reset statistics (required if multiple scripts executed in 
one JVM)
                Statistics.resetNoOfExecutedJobs();
                if( STATISTICS )
@@ -871,9 +861,8 @@ public class DMLScript
                                + MRConfigurationNames.DFS_PERMISSIONS_ENABLED 
+ " = " + perm );
 
                //print warning if permission issues possible
-               if( flagDiffUser && ( flagLocalFS || flagSecurity ) )
-               {
-                       LOG.warn("Cannot run map/reduce tasks as user 
'"+userName+"'. Using tasktracker group '"+ttGroupName+"'.");              
+               if( flagDiffUser && ( flagLocalFS || flagSecurity ) ) {
+                       LOG.warn("Cannot run map/reduce tasks as user 
'"+userName+"'. Using tasktracker group '"+ttGroupName+"'.");
                }
        }
        
@@ -895,25 +884,19 @@ public class DMLScript
                //this implementation does not create job specific sub 
directories)
                JobConf job = new 
JobConf(ConfigurationManager.getCachedJobConf());
                if( InfrastructureAnalyzer.isLocalMode(job) ) {
-                       try 
-                       {
-                               LocalFileUtils.deleteFileIfExists( 
DMLConfig.LOCAL_MR_MODE_STAGING_DIR + //staging dir (for local mode only) 
-                                                                          
dirSuffix  );        
-                               LocalFileUtils.deleteFileIfExists( 
MRJobConfiguration.getLocalWorkingDirPrefix(job) + //local dir
-                                                              dirSuffix );
-                               MapReduceTool.deleteFileIfExistOnHDFS( 
MRJobConfiguration.getSystemWorkingDirPrefix(job) + //system dir
-                                                                               
                           dirSuffix  );
-                               MapReduceTool.deleteFileIfExistOnHDFS( 
MRJobConfiguration.getStagingWorkingDirPrefix(job) + //staging dir
-                                                                               
       dirSuffix  );
+                       try {
+                               LocalFileUtils.deleteFileIfExists( 
DMLConfig.LOCAL_MR_MODE_STAGING_DIR + dirSuffix );
+                               LocalFileUtils.deleteFileIfExists( 
MRJobConfiguration.getLocalWorkingDirPrefix(job) + dirSuffix );
+                               MapReduceTool.deleteFileIfExistOnHDFS( 
MRJobConfiguration.getSystemWorkingDirPrefix(job) + dirSuffix );
+                               MapReduceTool.deleteFileIfExistOnHDFS( 
MRJobConfiguration.getStagingWorkingDirPrefix(job) + dirSuffix );
                        }
-                       catch(Exception ex)
-                       {
+                       catch(Exception ex) {
                                //we give only a warning because those 
directories are written by the mapred deamon 
                                //and hence, execution can still succeed
                                LOG.warn("Unable to cleanup hadoop working 
dirs: "+ex.getMessage());
                        }
-               }                       
-                       
+               }
+               
                //3) cleanup systemml-internal working dirs
                CacheableData.cleanupCacheDir(); //might be local/hdfs
                LocalFileUtils.cleanupWorkingDirectory();
@@ -924,12 +907,10 @@ public class DMLScript
        // private internal helper functionalities
        ////////
 
-       private static void printInvocationInfo(String fnameScript, String 
fnameOptConfig, Map<String,String> argVals)
-       {               
+       private static void printInvocationInfo(String fnameScript, String 
fnameOptConfig, Map<String,String> argVals) {
                LOG.debug("****** args to DML Script ******\n" + "UUID: " + 
getUUID() + "\n" + "SCRIPT PATH: " + fnameScript + "\n" 
-                       + "RUNTIME: " + rtplatform + "\n" + "BUILTIN CONFIG: " 
+ DMLConfig.DEFAULT_SYSTEMML_CONFIG_FILEPATH + "\n"
-                       + "OPTIONAL CONFIG: " + fnameOptConfig + "\n");
-
+                       + "RUNTIME: " + rtplatform + "\n" + "BUILTIN CONFIG: " 
+ DMLConfig.DEFAULT_SYSTEMML_CONFIG_FILEPATH + "\n"
+                       + "OPTIONAL CONFIG: " + fnameOptConfig + "\n");
                if( !argVals.isEmpty() ) {
                        LOG.debug("Script arguments are: \n");
                        for (int i=1; i<= argVals.size(); i++)
@@ -937,27 +918,23 @@ public class DMLScript
                }
        }
        
-       private static void printStartExecInfo(String dmlScriptString)
-       {
+       private static void printStartExecInfo(String dmlScriptString) {
                LOG.info("BEGIN DML run " + getDateTime());
                LOG.debug("DML script: \n" + dmlScriptString);
-               
                if (rtplatform == RUNTIME_PLATFORM.HADOOP || rtplatform == 
RUNTIME_PLATFORM.HYBRID) {
                        String hadoop_home = System.getenv("HADOOP_HOME");
                        LOG.info("HADOOP_HOME: " + hadoop_home);
                }
        }
        
-       private static String getDateTime() 
-       {
+       private static String getDateTime() {
                DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy 
HH:mm:ss");
                Date date = new Date();
                return dateFormat.format(date);
        }
 
        private static void cleanSystemMLWorkspace() {
-               try
-               {
+               try {
                        //read the default config
                        DMLConfig conf = DMLConfig.readConfigurationFile(null);
                        
@@ -974,9 +951,8 @@ public class DMLScript
                        if( localtmp != null )
                                
LocalFileUtils.cleanupRcWorkingDirectory(localtmp);
                }
-               catch(Exception ex)
-               {
+               catch(Exception ex) {
                        throw new DMLException("Failed to run SystemML 
workspace cleanup.", ex);
                }
        }
-}  
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/api/jmlc/Connection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/Connection.java 
b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
index 550b1c6..ea521e5 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/Connection.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
@@ -194,7 +194,7 @@ public class Connection implements Closeable
         */
        public void gatherMemStats(boolean stats) {
                DMLScript.STATISTICS = stats || DMLScript.STATISTICS;
-               DMLScript.JMLC_MEMORY_STATISTICS = stats;
+               DMLScript.JMLC_MEM_STATISTICS = stats;
        }
        
        /**

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java 
b/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
index dd8d7d2..2081aae 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
@@ -137,7 +137,7 @@ public class LocalVariableMap implements Cloneable
                        if( !dict.containsKey(hash) && e.getValue() instanceof 
CacheableData ) {
                                dict.put(hash, e.getValue());
                                double size = ((CacheableData<?>) 
e.getValue()).getDataSize();
-                               if ((DMLScript.JMLC_MEMORY_STATISTICS) && 
(DMLScript.FINEGRAINED_STATISTICS))
+                               if (DMLScript.JMLC_MEM_STATISTICS && 
DMLScript.FINEGRAINED_STATISTICS)
                                        
Statistics.maintainCPHeavyHittersMem(e.getKey(), size);
                                total += size;
                        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java 
b/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
index b7476ae..72b5051 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
@@ -259,7 +259,7 @@ public class ProgramBlock implements ParseInfo
                                Statistics.maintainCPHeavyHitters(
                                        tmp.getExtendedOpcode(), 
System.nanoTime()-t0);
                        }
-                       if ((DMLScript.JMLC_MEMORY_STATISTICS) && 
(DMLScript.FINEGRAINED_STATISTICS))
+                       if (DMLScript.JMLC_MEM_STATISTICS && 
DMLScript.FINEGRAINED_STATISTICS)
                                ec.getVariables().getPinnedDataSize();
 
                        // optional trace information (instruction and runtime)

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
index 5b1c26b..6ce1cd3 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
@@ -487,6 +487,8 @@ public abstract class CacheableData<T extends CacheBlock> 
extends Data
                if( DMLScript.STATISTICS ){
                        long t1 = System.nanoTime();
                        CacheStatistics.incrementAcquireMTime(t1-t0);
+                       if (DMLScript.JMLC_MEM_STATISTICS)
+                               
Statistics.addCPMemObject(System.identityHashCode(this), getDataSize());
                }
                
                return ret;
@@ -504,9 +506,6 @@ public abstract class CacheableData<T extends CacheBlock> 
extends Data
                
                setDirty(true);
                _isAcquireFromEmpty = false;
-
-               if (DMLScript.JMLC_MEMORY_STATISTICS)
-                       Statistics.addCPMemObject(newData);
                
                //set references to new data
                if (newData == null)
@@ -573,11 +572,6 @@ public abstract class CacheableData<T extends CacheBlock> 
extends Data
                                }
                                _requiresLocalWrite = false;
                        }
-
-                       if ((DMLScript.JMLC_MEMORY_STATISTICS) && (this._data 
!= null)) {
-                               int hash = System.identityHashCode(this._data);
-                               Statistics.removeCPMemObject(hash);
-                       }
                        
                        //create cache
                        createCache();
@@ -608,10 +602,6 @@ public abstract class CacheableData<T extends CacheBlock> 
extends Data
                          ||(_data!=null && !isCachingActive()) )) //additional 
condition for JMLC
                        freeEvictedBlob();
 
-               if ((DMLScript.JMLC_MEMORY_STATISTICS) && (this._data != null)) 
{
-                       int hash = System.identityHashCode(this._data);
-                       Statistics.removeCPMemObject(hash);
-               }
                // clear the in-memory data
                _data = null;
                clearCache();

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
index 77598bf..d0d0b08 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
@@ -59,6 +59,7 @@ import org.apache.sysml.runtime.matrix.data.OutputInfo;
 import org.apache.sysml.runtime.matrix.data.Pair;
 import org.apache.sysml.runtime.util.MapReduceTool;
 import org.apache.sysml.utils.GPUStatistics;
+import org.apache.sysml.utils.Statistics;
 
 
 public class ExecutionContext {
@@ -600,6 +601,8 @@ public class ExecutionContext {
        }
        
        public void cleanupCacheableData(CacheableData<?> mo) {
+               if (DMLScript.JMLC_MEM_STATISTICS)
+                       
Statistics.removeCPMemObject(System.identityHashCode(mo));
                //early abort w/o scan of symbol table if no cleanup required
                boolean fileExists = (mo.isHDFSFileExists() && mo.getFileName() 
!= null);
                if( !CacheableData.isCachingActive() && !fileExists )

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
index 4196ef8..a3c7677 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
@@ -1097,6 +1097,9 @@ public class SparkExecutionContext extends 
ExecutionContext
                //and hence is transparently used by rmvar instructions and 
other users. The
                //core difference is the lineage-based cleanup of RDD and 
broadcast variables.
 
+               if (DMLScript.JMLC_MEM_STATISTICS)
+                       
Statistics.removeCPMemObject(System.identityHashCode(mo));
+
                if( !mo.isCleanupEnabled() )
                        return;
                

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/utils/Statistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java 
b/src/main/java/org/apache/sysml/utils/Statistics.java
index 7c16375..8f0d853 100644
--- a/src/main/java/org/apache/sysml/utils/Statistics.java
+++ b/src/main/java/org/apache/sysml/utils/Statistics.java
@@ -22,7 +22,6 @@ package org.apache.sysml.utils;
 import java.lang.management.CompilationMXBean;
 import java.lang.management.GarbageCollectorMXBean;
 import java.lang.management.ManagementFactory;
-import java.lang.ref.SoftReference;
 import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -36,7 +35,6 @@ import java.util.concurrent.atomic.LongAdder;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
-import org.apache.sysml.runtime.controlprogram.caching.CacheBlock;
 import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.instructions.Instruction;
@@ -81,12 +79,6 @@ public class Statistics
        private static final ConcurrentHashMap<String,Double> _cpMemObjs = new 
ConcurrentHashMap<>();
        private static final ConcurrentHashMap<Integer,Double> _currCPMemObjs = 
new ConcurrentHashMap<>();
 
-       // this hash map maintains soft references to the cache blocks in 
memory. It is periodically scanned to check for
-       // objects which have been garbage collected. This enables more 
accurate memory statistics. Relying on rmvar
-       // instructions to determine when an object has been de-allocated 
results in a substantial underestimate to memory
-       // use by the program since garbage collection will not occur 
immediately.
-       private static final 
ConcurrentHashMap<Integer,SoftReference<CacheBlock>> _liveObjects = new 
ConcurrentHashMap<>();
-
        //JVM stats (low frequency updates)
        private static long jitCompileTime = 0; //in milli sec
        private static long jvmGCTime = 0; //in milli sec
@@ -601,33 +593,11 @@ public class Statistics
                return opcode;
        }
 
-       public static void addCPMemObject(CacheBlock data) {
-               int hash = System.identityHashCode(data);
-               double sizeof = data.getInMemorySize();
-
+       public static void addCPMemObject(int hash, double sizeof) {
                double sizePrev = _currCPMemObjs.getOrDefault(hash, 0.0);
                _currCPMemObjs.put(hash, sizeof);
                sizeofPinnedObjects.add(sizeof - sizePrev);
-               if (DMLScript.FINEGRAINED_STATISTICS)
-                       _liveObjects.putIfAbsent(hash, new 
SoftReference<>(data));
                maintainMemMaxStats();
-               checkForDeadBlocks();
-       }
-
-       /**
-        * If finegrained statistics are enabled searches through a map of soft 
references to find objects
-        * which have been garbage collected. This results in more accurate 
statistics on memory use but
-        * introduces overhead so is only enabled with finegrained stats and 
when running in JMLC
-        */
-       public static void checkForDeadBlocks() {
-               if (!DMLScript.FINEGRAINED_STATISTICS)
-                       return;
-               for (Entry<Integer,SoftReference<CacheBlock>> e : 
_liveObjects.entrySet()) {
-                       if (e.getValue().get() == null) {
-                               removeCPMemObject(e.getKey());
-                               _liveObjects.remove(e.getKey());
-                       }
-               }
        }
 
        /**
@@ -994,8 +964,8 @@ public class Statistics
                        sb.append("Cache hits (Mem, WB, FS, HDFS):\t" + 
CacheStatistics.displayHits() + ".\n");
                        sb.append("Cache writes (WB, FS, HDFS):\t" + 
CacheStatistics.displayWrites() + ".\n");
                        sb.append("Cache times (ACQr/m, RLS, EXP):\t" + 
CacheStatistics.displayTime() + " sec.\n");
-                       if (DMLScript.JMLC_MEMORY_STATISTICS)
-                               sb.append("Max size of objects in CP memory:\t" 
+ byteCountToDisplaySize(getSizeofPinnedObjects()) + " ("  + 
getNumPinnedObjects() + " total objects)" + "\n");
+                       if (DMLScript.JMLC_MEM_STATISTICS)
+                               sb.append("Max size of live objects:\t" + 
byteCountToDisplaySize(getSizeofPinnedObjects()) + " ("  + 
getNumPinnedObjects() + " total objects)" + "\n");
                        sb.append("HOP DAGs recompiled (PRED, SB):\t" + 
getHopRecompiledPredDAGs() + "/" + getHopRecompiledSBDAGs() + ".\n");
                        sb.append("HOP DAGs recompile time:\t" + 
String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n");
                        if( getFunRecompiles()>0 ) {
@@ -1047,7 +1017,7 @@ public class Statistics
                        sb.append("Total JVM GC time:\t\t" + 
((double)getJVMgcTime())/1000 + " sec.\n");
                        LibMatrixDNN.appendStatistics(sb);
                        sb.append("Heavy hitter instructions:\n" + 
getHeavyHitters(maxHeavyHitters));
-                       if ((DMLScript.JMLC_MEMORY_STATISTICS) && 
(DMLScript.FINEGRAINED_STATISTICS))
+                       if (DMLScript.JMLC_MEM_STATISTICS && 
DMLScript.FINEGRAINED_STATISTICS)
                                sb.append("Heavy hitter objects:\n" + 
getCPHeavyHittersMem(maxHeavyHitters));
                }
 

Reply via email to