systemml git commit: [MINOR] updates to performance scripts

nakul02 Wed, 16 Aug 2017 11:15:15 -0700

Repository: systemml
Updated Branches:
  refs/heads/master a2bf0006f -> ce240af57



[MINOR] updates to performance scripts

Closes #618


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ce240af5
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ce240af5
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ce240af5

Branch: refs/heads/master
Commit: ce240af57fb68caa3a978a8bad62701cb55a139d
Parents: a2bf000
Author: Nakul Jindal <[email protected]>
Authored: Wed Aug 16 11:14:43 2017 -0700
Committer: Nakul Jindal <[email protected]>
Committed: Wed Aug 16 11:14:43 2017 -0700

----------------------------------------------------------------------
 bin/systemml-standalone.py              |  10 ++-
 scripts/perftest/python/run_perftest.py |  26 +++----
 scripts/perftest/python/utils_exec.py   |   1 +
 scripts/perftest/python/utils_misc.py   | 104 +++++++++++++++------------
 4 files changed, 82 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/bin/systemml-standalone.py
----------------------------------------------------------------------
diff --git a/bin/systemml-standalone.py b/bin/systemml-standalone.py
index 4000e75..02aefcf 100755
--- a/bin/systemml-standalone.py
+++ b/bin/systemml-standalone.py
@@ -43,7 +43,7 @@ def default_classpath(systemml_home):
 
 #TODO
 # User dir, fix for SYSTEMML_1795
-def standalone_execution_entry(nvargs, args, config, explain, debug, stats, 
gpu, f):
+def standalone_execution_entry(nvargs, args, config, explain, debug, stats, 
gpu, heapmem, f):
     """
     This function is responsible for the execution of arguments via
     subprocess call in singlenode mode
@@ -57,7 +57,7 @@ def standalone_execution_entry(nvargs, args, config, explain, 
debug, stats, gpu,
     else:
         default_cp = ':'.join(default_classpath(systemml_home))
 
-    java_memory = '-Xmx8g -Xms4g -Xmn1g'
+    java_memory = '-Xmx' + heapmem + ' -Xms4g -Xmn1g'
 
     # Log4j
     log4j = log4j_path(systemml_home)
@@ -93,7 +93,10 @@ def standalone_execution_entry(nvargs, args, config, 
explain, debug, stats, gpu,
            '-f', script_file, '-exec', 'singlenode', '-config', default_config,
            ' '.join(ml_options)]
 
-    return_code = os.system(' '.join(cmd))
+    cmd = ' '.join(cmd)
+    print(cmd)
+
+    return_code = os.system(cmd)
     return return_code
 
 
@@ -115,6 +118,7 @@ if __name__ == '__main__':
     cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, 
'
                                       'set <force> option to skip conservative 
memory estimates '
                                       'and use GPU wherever possible', 
nargs='?')
+    cparser.add_argument('-heapmem', help='maximum JVM heap memory', 
metavar='', default='8g')
     cparser.add_argument('-f', required=True, help='specifies dml/pydml file 
to execute; '
                                                    'path can be 
local/hdfs/gpfs', metavar='')
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/scripts/perftest/python/run_perftest.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/run_perftest.py 
b/scripts/perftest/python/run_perftest.py
index d430569..8c3d1fa 100755
--- a/scripts/perftest/python/run_perftest.py
+++ b/scripts/perftest/python/run_perftest.py
@@ -32,8 +32,7 @@ from datagen import config_packets_datagen
 from train import config_packets_train
 from predict import config_packets_predict
 from utils_misc import get_families, config_reader, \
-    exec_dml_and_parse_time, exec_test_data, check_predict, 
get_folder_metrics, args_dict_split, \
-    get_config_args
+    exec_dml_and_parse_time, exec_test_data, check_predict, 
get_folder_metrics, split_config_args
 from utils_fs import create_dir_local, write_success, check_SUCCESS_file_exists
 
 # A packet is a dictionary
@@ -84,8 +83,6 @@ ML_PREDICT = {'Kmeans': 'Kmeans-predict',
 
 DENSE_TYPE_ALGOS = ['clustering', 'stats1', 'stats2']
 
-sup_args_dict = {}
-
 
 # Responsible for execution and metric logging
 def algorithm_workflow(algo, exec_type, config_path, dml_file_name, 
action_mode, current_dir):
@@ -134,7 +131,7 @@ def algorithm_workflow(algo, exec_type, config_path, 
dml_file_name, action_mode,
     if exit_flag_success:
         time = 'data_exists'
     else:
-        time = exec_dml_and_parse_time(exec_type, dml_file_name, args, 
spark_args_dict, sup_args_dict, config_path)
+        time = exec_dml_and_parse_time(exec_type, dml_file_name, args, 
backend_args_dict, systemml_args_dict, config_path)
         write_success(time, temp_cwd)
 
     print('{},{},{},{},{},{}'.format(algo, action_mode, intercept, mat_type, 
mat_shape, time))
@@ -222,7 +219,7 @@ def perf_test_entry(family, algo, exec_type, mat_type, 
mat_shape, config_dir, mo
                 # Statistic family do not require to be split
                 if family_name not in ['stats1', 'stats2']:
                     if not success_file:
-                        exec_test_data(exec_type, spark_args_dict, 
sup_args_dict, data_gen_dir, config)
+                        exec_test_data(exec_type, backend_args_dict, 
systemml_args_dict, data_gen_dir, config)
 
     if 'train' in mode:
         # Create config directories
@@ -297,7 +294,7 @@ if __name__ == '__main__':
                                       description='SystemML Performance Test 
Script')
     cparser.add_argument('--family', help='space separated list of classes of 
algorithms '
                          '(available : ' + ', '.join(sorted(all_families)) + 
')',
-                         metavar='', choices=all_families, nargs='+')
+                         metavar='', choices=all_families, nargs='+', 
default=' '.join(all_families))
     cparser.add_argument('--algo', help='space separated list of algorithm to 
run '
                          '(Overrides --family, available : ' + ', 
'.join(sorted(all_algos)) + ')', metavar='',
                          choices=all_algos, nargs='+')
@@ -329,7 +326,9 @@ if __name__ == '__main__':
     cparser.add_argument('-explain', help='explains plan levels can be hops, 
runtime, '
                                           'recompile_hops, recompile_runtime', 
nargs='?', const='runtime', metavar='')
     cparser.add_argument('-config', help='System-ML configuration file (e.g 
SystemML-config.xml)', metavar='')
-
+    cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, 
'
+                                      'set <force> option to skip conservative 
memory estimates '
+                                      'and use GPU wherever possible', 
nargs='?')
     # Spark Configuration Option
     cparser.add_argument('--master', help='local, yarn-client, yarn-cluster', 
metavar='')
     cparser.add_argument('--driver-memory', help='Memory for driver (e.g. 
512M)', metavar='')
@@ -338,15 +337,18 @@ if __name__ == '__main__':
     cparser.add_argument('--executor-cores', help='Number of cores', 
metavar='')
     cparser.add_argument('--conf', help='Spark configuration file', nargs='+', 
metavar='')
 
+    # Single node execution mode options
+    cparser.add_argument('-heapmem', help='maximum JVM heap memory', 
metavar='', default='8g')
+
+
     # Args is a namespace
     args = cparser.parse_args()
     all_arg_dict = vars(args)
-    arg_dict, config_dict, spark_dict = args_dict_split(all_arg_dict)
 
     create_dir_local(args.config_dir)
 
     # Global variables
-    sup_args_dict, spark_args_dict = get_config_args(config_dict, spark_dict, 
args.exec_type)
+    perftest_args_dict, systemml_args_dict, backend_args_dict = 
split_config_args(all_arg_dict)
 
     # Debug arguments
     # print(arg_dict)
@@ -395,8 +397,8 @@ if __name__ == '__main__':
     
logging.info('algorithm,run_type,intercept,matrix_type,data_shape,time_sec')
 
     # Remove filename item from dictionary as its already used to create the 
log above
-    del arg_dict['filename']
-    perf_test_entry(**arg_dict)
+    del perftest_args_dict['filename']
+    perf_test_entry(**perftest_args_dict)
 
     total_time = (time.time() - start_time)
     logging.info('Performance tests complete {0:.3f} secs 
\n'.format(total_time))

http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/scripts/perftest/python/utils_exec.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_exec.py 
b/scripts/perftest/python/utils_exec.py
index 87ae3cd..cf98d0f 100755
--- a/scripts/perftest/python/utils_exec.py
+++ b/scripts/perftest/python/utils_exec.py
@@ -52,6 +52,7 @@ def subprocess_exec(cmd_string, log_file_path=None, 
extract=None):
 
     error_arr, out_arr = get_all_logs(proc1)
     std_outs = out_arr + error_arr
+    std_outs.insert(0, ' '.join(exec_command))
     return_code = proc1.returncode
 
     if log_file_path is not None:

http://git-wip-us.apache.org/repos/asf/systemml/blob/ce240af5/scripts/perftest/python/utils_misc.py
----------------------------------------------------------------------
diff --git a/scripts/perftest/python/utils_misc.py 
b/scripts/perftest/python/utils_misc.py
index e247ce8..a3c98c2 100755
--- a/scripts/perftest/python/utils_misc.py
+++ b/scripts/perftest/python/utils_misc.py
@@ -30,53 +30,68 @@ from utils_exec import subprocess_exec
 # This file contains all misc utility functions required by performance test 
module
 
 
-def get_config_args(config_dict, spark_dict, exec_type):
+def split_config_args(args):
     """
     Based on configuration parameters passed build configuration dictionary 
used by subprocess
 
-    config_dict: Dictionary
-    General configuration options
+    args: Dictionary
+    All parameters passed in
 
-    spark_dict: Dictionary
-    Spark configuration options
+    return: Dictionary, Dictionary, Dictionary
+    3 dictionaries - one for perf tests, one for systemml specific args, one 
for backend options    
+    """
+
+    perftest_args_dict = {}
+    
+    perftest_args_dict['family'] = args['family']
+    perftest_args_dict['algo'] = args['algo']
+    perftest_args_dict['exec_type'] = args['exec_type']
+    perftest_args_dict['mat_type'] = args['mat_type']
+    perftest_args_dict['mat_shape'] = args['mat_shape']
+    perftest_args_dict['config_dir'] = args['config_dir']
+    perftest_args_dict['filename'] = args['filename']
+    perftest_args_dict['mode'] = args['mode']
+    perftest_args_dict['temp_dir'] = args['temp_dir']
 
-    exec_type: String
-    Contains the execution type singlenode / hybrid_spark
 
-    return: Dictionary, Dictionary
-    Based on the parameters passed we build to dictionary that need to be 
passed either at the
-    beginning or at the end
-    """
 
-    sup_args_dict = {}
+    systemml_args_dict = {}
 
-    if config_dict['stats'] is not None:
-        sup_args_dict['-stats'] = config_dict['stats']
+    if args['stats'] is not None:
+        systemml_args_dict['-stats'] = args['stats']
 
-    if config_dict['explain'] is not None:
-        sup_args_dict['-explain'] = config_dict['explain']
+    if args['explain'] is not None:
+        systemml_args_dict['-explain'] = args['explain']
 
-    if config_dict['config'] is not None:
-        sup_args_dict['-config'] = config_dict['config']
+    if args['config'] is not None:
+        systemml_args_dict['-config'] = args['config']
 
-    spark_args_dict = {}
+    if args['gpu'] is not None:
+        systemml_args_dict['-gpu'] = args['gpu']
+
+    backend_args_dict = {}
+    exec_type = args['exec_type']
+    
     if exec_type == 'hybrid_spark':
-        if spark_dict['master'] is not None:
-            spark_args_dict['--master'] = spark_dict['master']
+        if args['master'] is not None:
+            backend_args_dict['--master'] = args['master']
 
-        if spark_dict['num_executors'] is not None:
-            spark_args_dict['--num-executors'] = spark_dict['num_executors']
+        if args['num_executors'] is not None:
+            backend_args_dict['--num-executors'] = args['num_executors']
 
-        if spark_dict['driver_memory'] is not None:
-            spark_args_dict['--driver-memory'] = spark_dict['driver_memory']
+        if args['driver_memory'] is not None:
+            backend_args_dict['--driver-memory'] = args['driver_memory']
 
-        if spark_dict['executor_cores'] is not None:
-            spark_args_dict['--executor-cores'] = spark_dict['executor_cores']
+        if args['executor_cores'] is not None:
+            backend_args_dict['--executor-cores'] = args['executor_cores']
 
-        if spark_dict['conf'] is not None:
-            spark_args_dict['--conf'] = ' '.join(spark_dict['conf'])
+        if args['conf'] is not None:
+            backend_args_dict['--conf'] = ' '.join(args['conf'])
+    elif exec_type == 'singlenode':
+        if args['heapmem'] is not None:
+            backend_args_dict['-heapmem'] = args['heapmem']
 
-    return sup_args_dict, spark_args_dict
+    return perftest_args_dict, systemml_args_dict, backend_args_dict
 
 
 def args_dict_split(all_arguments):
@@ -87,13 +102,14 @@ def args_dict_split(all_arguments):
     All input arguments parsed
 
     return: Dictionary, Dictionary, Dictionary
-    We return three dictionaries for init, script, spark arguments
+    We return four dictionaries for init, script, spark arguments, singlenode 
arguments
     """
     args_dict = dict(list(all_arguments.items())[0:9])
-    config_dict = dict(list(all_arguments.items())[9:12])
-    spark_dict = dict(list(all_arguments.items())[12:])
+    config_dict = dict(list(all_arguments.items())[9:13])
+    spark_dict = dict(list(all_arguments.items())[13:19])
+    singlenode_dict = dict(list(all_arguments.items())[19:])
 
-    return args_dict, config_dict, spark_dict
+    return args_dict, config_dict, spark_dict, singlenode_dict
 
 
 def get_families(current_algo, ml_algo):
@@ -166,7 +182,7 @@ def config_reader(read_path):
     return conf_file
 
 
-def exec_dml_and_parse_time(exec_type, dml_file_name, args, spark_args_dict, 
sup_args_dict, log_file_name=None):
+def exec_dml_and_parse_time(exec_type, dml_file_name, args, backend_args_dict, 
systemml_args_dict, log_file_name=None):
     """
     This function is responsible of execution of input arguments via python 
sub process,
     We also extract time obtained from the output of this subprocess
@@ -180,10 +196,10 @@ def exec_dml_and_parse_time(exec_type, dml_file_name, 
args, spark_args_dict, sup
     args: Dictionary
     Key values pairs depending on the arg type
 
-    spark_args_dict: Dictionary
-    Spark configuration arguments
+    backend_args_dict: Dictionary
+    Spark configuration arguments / singlenode config arguments
 
-    sup_args_dict: Dictionary
+    systemml_args_dict: Dictionary
     Supplementary arguments required by the script
 
     log_file_name: String
@@ -195,17 +211,17 @@ def exec_dml_and_parse_time(exec_type, dml_file_name, 
args, spark_args_dict, sup
 
     algorithm = dml_file_name + '.dml'
 
-    sup_args = ''.join(['{} {}'.format(k, v) for k, v in 
sup_args_dict.items()])
+    sup_args = ''.join(['{} {}'.format(k, v) for k, v in 
systemml_args_dict.items()])
     if exec_type == 'singlenode':
         exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 
'systemml-standalone.py')
-
+        singlenode_pre_args = ''.join([' {} {} '.format(k, v) for k, v in 
backend_args_dict.items()])
         args = ''.join(['{} {}'.format(k, v) for k, v in args.items()])
-        cmd = [exec_script, '-f', algorithm, args, sup_args]
+        cmd = [exec_script, singlenode_pre_args, '-f', algorithm, args, 
sup_args]
         cmd_string = ' '.join(cmd)
 
     if exec_type == 'hybrid_spark':
         exec_script = join(os.environ.get('SYSTEMML_HOME'), 'bin', 
'systemml-spark-submit.py')
-        spark_pre_args = ''.join([' {} {} '.format(k, v) for k, v in 
spark_args_dict.items()])
+        spark_pre_args = ''.join([' {} {} '.format(k, v) for k, v in 
backend_args_dict.items()])
         args = ''.join(['{} {}'.format(k, v) for k, v in args.items()])
         cmd = [exec_script, spark_pre_args, '-f', algorithm, args, sup_args]
         cmd_string = ' '.join(cmd)
@@ -238,7 +254,7 @@ def parse_time(raw_logs):
     return 'time_not_found'
 
 
-def exec_test_data(exec_type, spark_args_dict, sup_args_dict, datagen_path, 
config):
+def exec_test_data(exec_type, backend_args_dict, systemml_args_dict, 
datagen_path, config):
     """
     Creates the test data split from the given input path
 
@@ -256,7 +272,7 @@ def exec_test_data(exec_type, spark_args_dict, 
sup_args_dict, datagen_path, conf
     X_test = join(path, 'X_test.data')
     Y_test = join(path, 'Y_test.data')
     args = {'-args': ' '.join([X, Y, X_test, Y_test, 'csv'])}
-    exec_dml_and_parse_time(exec_type, test_split_script, args, 
spark_args_dict, sup_args_dict)
+    exec_dml_and_parse_time(exec_type, test_split_script, args, 
backend_args_dict, systemml_args_dict)
 
 
 def check_predict(current_algo, ml_predict):

systemml git commit: [MINOR] updates to performance scripts

Reply via email to