This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new d6b7d1b  [MINOR] Fix minor issues perftest suite (datagen, spark 
summit)
d6b7d1b is described below

commit d6b7d1b82e18504e6c4463b332efa3e9c5ddf1f9
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Oct 9 16:25:03 2021 +0200

    [MINOR] Fix minor issues perftest suite (datagen, spark summit)
---
 scripts/perftest/conf/log4j.properties             | 40 ++++++++++++++++++++++
 scripts/perftest/genBinomialData.sh                | 39 ++++++++++-----------
 scripts/perftest/genMultinomialData.sh             | 40 +++++++++++-----------
 scripts/perftest/runAll.sh                         |  5 ++-
 scripts/perftest/scripts/extractTestData.dml       | 11 ++----
 .../{scripts/extractTestData.dml => sparkDML.sh}   | 29 +++++++++++-----
 6 files changed, 104 insertions(+), 60 deletions(-)

diff --git a/scripts/perftest/conf/log4j.properties 
b/scripts/perftest/conf/log4j.properties
new file mode 100644
index 0000000..fbfd465
--- /dev/null
+++ b/scripts/perftest/conf/log4j.properties
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+log4j.rootCategory=ERROR, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p 
%c{1}: %m%n
+
+# Set the default spark-shell log level to WARN. When running the spark-shell, 
the
+# log level for this class is used to overwrite the root logger's log level, 
so that
+# the user can have different defaults for the shell and regular Spark apps.
+log4j.logger.org.apache.spark.repl.Main=WARN
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+log4j.logger.org.apache.parquet=ERROR
+log4j.logger.parquet=ERROR
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent 
UDFs in SparkSQL with Hive support
+log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
+log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
diff --git a/scripts/perftest/genBinomialData.sh 
b/scripts/perftest/genBinomialData.sh
index 1352fc0..8fda720 100755
--- a/scripts/perftest/genBinomialData.sh
+++ b/scripts/perftest/genBinomialData.sh
@@ -27,33 +27,32 @@ FORMAT="binary" # can be csv, mm, text, binary
 DENSE_SP=0.9
 SPARSE_SP=0.01
 
-
 #generate XS scenarios (80MB)
-${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 10000 1000 5 5 
${BASE}/w10k_1k_dense ${BASE}/X10k_1k_dense ${BASE}/y10k_1k_dense 1 0 $DENSE_SP 
$FORMAT 1
-${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 10000 1000 5 5 
${BASE}/w10k_1k_sparse ${BASE}/X10k_1k_sparse ${BASE}/y10k_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 10000 1000 5 5 
${BASE}/w10k_1k_dense ${BASE}/X10k_1k_dense ${BASE}/y10k_1k_dense 1 0 $DENSE_SP 
$FORMAT 1
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 10000 1000 5 5 
${BASE}/w10k_1k_sparse ${BASE}/X10k_1k_sparse ${BASE}/y10k_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
 ${CMD} -f scripts/extractTestData.dml --args ${BASE}/X10k_1k_dense 
${BASE}/y10k_1k_dense ${BASE}/X10k_1k_dense_test ${BASE}/y10k_1k_dense_test 
$FORMAT
 ${CMD} -f scripts/extractTestData.dml --args ${BASE}/X10k_1k_sparse 
${BASE}/y10k_1k_sparse ${BASE}/X10k_1k_sparse_test ${BASE}/y10k_1k_sparse_test 
$FORMAT
 
 ##generate S scenarios (800MB)
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 100000 1000 5 
5 ${BASE}/w100k_1k_dense ${BASE}/X100k_1k_dense ${BASE}/y100k_1k_dense 1 0 
$DENSE_SP $FORMAT 1
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 100000 1000 5 
5 ${BASE}/w100k_1k_sparse ${BASE}/X100k_1k_sparse ${BASE}/y100k_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
-#${CMD} -f scripts/extractTestData.dml --args ${BASE}/X100k_1k_dense 
${BASE}/y100k_1k_dense ${BASE}/X100k_1k_dense_test ${BASE}/y100k_1k_dense_test 
$FORMAT
-#${CMD} -f scripts/extractTestData.dml --args ${BASE}/X100k_1k_sparse 
${BASE}/y100k_1k_sparse ${BASE}/X100k_1k_sparse_test 
${BASE}/y100k_1k_sparse_test $FORMAT
-#
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 100000 1000 5 5 
${BASE}/w100k_1k_dense ${BASE}/X100k_1k_dense ${BASE}/y100k_1k_dense 1 0 
$DENSE_SP $FORMAT 1
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 100000 1000 5 5 
${BASE}/w100k_1k_sparse ${BASE}/X100k_1k_sparse ${BASE}/y100k_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
+${CMD} -f scripts/extractTestData.dml --args ${BASE}/X100k_1k_dense 
${BASE}/y100k_1k_dense ${BASE}/X100k_1k_dense_test ${BASE}/y100k_1k_dense_test 
$FORMAT
+${CMD} -f scripts/extractTestData.dml --args ${BASE}/X100k_1k_sparse 
${BASE}/y100k_1k_sparse ${BASE}/X100k_1k_sparse_test 
${BASE}/y100k_1k_sparse_test $FORMAT
+
 ##generate M scenarios (8GB)
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 1000000 1000 5 
5 ${BASE}/w1M_1k_dense ${BASE}/X1M_1k_dense ${BASE}/y1M_1k_dense 1 0 $DENSE_SP 
$FORMAT 1
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 1000000 1000 5 
5 ${BASE}/w1M_1k_sparse ${BASE}/X1M_1k_sparse ${BASE}/y1M_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
-#${CMD} -f scripts/extractTestData.dml --args ${BASE}/X1M_1k_dense 
${BASE}/y1M_1k_dense ${BASE}/X1M_1k_dense_test ${BASE}/y1M_1k_dense_test $FORMAT
-#${CMD} -f scripts/extractTestData.dml --args ${BASE}/X1M_1k_sparse 
${BASE}/y1M_1k_sparse ${BASE}/X1M_1k_sparse_test ${BASE}/y1M_1k_sparse_test 
$FORMAT
-#
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 1000000 1000 5 5 
${BASE}/w1M_1k_dense ${BASE}/X1M_1k_dense ${BASE}/y1M_1k_dense 1 0 $DENSE_SP 
$FORMAT 1
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 1000000 1000 5 5 
${BASE}/w1M_1k_sparse ${BASE}/X1M_1k_sparse ${BASE}/y1M_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
+${CMD} -f scripts/extractTestData.dml --args ${BASE}/X1M_1k_dense 
${BASE}/y1M_1k_dense ${BASE}/X1M_1k_dense_test ${BASE}/y1M_1k_dense_test $FORMAT
+${CMD} -f scripts/extractTestData.dml --args ${BASE}/X1M_1k_sparse 
${BASE}/y1M_1k_sparse ${BASE}/X1M_1k_sparse_test ${BASE}/y1M_1k_sparse_test 
$FORMAT
+
 ##generate L scenarios (80GB)
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 10000000 1000 
5 5 ${BASE}/w10M_1k_dense ${BASE}/X10M_1k_dense ${BASE}/y10M_1k_dense 1 0 
$DENSE_SP $FORMAT 1
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 10000000 1000 
5 5 ${BASE}/w10M_1k_sparse ${BASE}/X10M_1k_sparse ${BASE}/y10M_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
-#${CMD} -f scripts/extractTestData.dml --args ${BASE}/X10M_1k_dense 
${BASE}/y10M_1k_dense ${BASE}/X10M_1k_dense_test ${BASE}/y10M_1k_dense_test 
$FORMAT
-#${CMD} -f scripts/extractTestData.dml --args ${BASE}/X10M_1k_sparse 
${BASE}/y10M_1k_sparse ${BASE}/X10M_1k_sparse_test ${BASE}/y10M_1k_sparse_test 
$FORMAT
-#
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 10000000 1000 5 
5 ${BASE}/w10M_1k_dense ${BASE}/X10M_1k_dense ${BASE}/y10M_1k_dense 1 0 
$DENSE_SP $FORMAT 1
+${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 10000000 1000 5 
5 ${BASE}/w10M_1k_sparse ${BASE}/X10M_1k_sparse ${BASE}/y10M_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
+${CMD} -f scripts/extractTestData.dml --args ${BASE}/X10M_1k_dense 
${BASE}/y10M_1k_dense ${BASE}/X10M_1k_dense_test ${BASE}/y10M_1k_dense_test 
$FORMAT
+${CMD} -f scripts/extractTestData.dml --args ${BASE}/X10M_1k_sparse 
${BASE}/y10M_1k_sparse ${BASE}/X10M_1k_sparse_test ${BASE}/y10M_1k_sparse_test 
$FORMAT
+
 ##generate XL scenarios (800GB)
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 100000000 1000 
5 5 ${BASE}/w100M_1k_dense ${BASE}/X100M_1k_dense ${BASE}/y100M_1k_dense 1 0 
$DENSE_SP $FORMAT 1
-#${CMD} -f ../datagen/genRandData4LogisticRegression.dml --args 100000000 1000 
5 5 ${BASE}/w100M_1k_sparse ${BASE}/X100M_1k_sparse ${BASE}/y100M_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
+#${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 100000000 1000 
5 5 ${BASE}/w100M_1k_dense ${BASE}/X100M_1k_dense ${BASE}/y100M_1k_dense 1 0 
$DENSE_SP $FORMAT 1
+#${CMD} -f ./datagen/genRandData4LogisticRegression.dml --args 100000000 1000 
5 5 ${BASE}/w100M_1k_sparse ${BASE}/X100M_1k_sparse ${BASE}/y100M_1k_sparse 1 0 
$SPARSE_SP $FORMAT 1
 #${CMD} -f scripts/extractTestData.dml --args ${BASE}/X100M_1k_dense 
${BASE}/y100M_1k_dense ${BASE}/X100M_1k_dense_test ${BASE}/y100M_1k_dense_test 
$FORMAT
 #${CMD} -f scripts/extractTestData.dml --args ${BASE}/X100M_1k_sparse 
${BASE}/y100M_1k_sparse ${BASE}/X100M_1k_sparse_test 
${BASE}/y100M_1k_sparse_test $FORMAT
\ No newline at end of file
diff --git a/scripts/perftest/genMultinomialData.sh 
b/scripts/perftest/genMultinomialData.sh
index c9020a3..7ea6cad 100755
--- a/scripts/perftest/genMultinomialData.sh
+++ b/scripts/perftest/genMultinomialData.sh
@@ -28,31 +28,31 @@ DENSE_SP=0.9
 SPARSE_SP=0.01
 
 #generate XS scenarios (80MB)
-${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 10000 1000 
$DENSE_SP 5 0 $BASE/X10k_1k_dense_k5 $BASE/y10k_1k_dense_k5 $FORMAT 1
-${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 10000 1000 
$SPARSE_SP 5 0 $BASE/X10k_1k_sparse_k5 $BASE/y10k_1k_sparse_k5 $FORMAT 1
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 10000 1000 
$DENSE_SP 5 0 $BASE/X10k_1k_dense_k5 $BASE/y10k_1k_dense_k5 $FORMAT 1
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 10000 1000 
$SPARSE_SP 5 0 $BASE/X10k_1k_sparse_k5 $BASE/y10k_1k_sparse_k5 $FORMAT 1
 ${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X10k_1k_dense_k5 
$BASE/y10k_1k_dense_k5 $BASE/X10k_1k_dense_k5_test $BASE/y10k_1k_dense_k5_test 
$FORMAT
 ${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X10k_1k_sparse_k5 
$BASE/y10k_1k_sparse_k5 $BASE/X10k_1k_sparse_k5_test 
$BASE/y10k_1k_sparse_k5_test $FORMAT
 
-##generate S scenarios (80MB)
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 100000 1000 
$DENSE_SP 5 0 $BASE/X100k_1k_dense_k5 $BASE/y100k_1k_dense_k5 $FORMAT 1
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 100000 1000 
$SPARSE_SP 5 0 $BASE/X100k_1k_sparse_k5 $BASE/y100k_1k_sparse_k5 $FORMAT 1
-#${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X100k_1k_dense_k5 
$BASE/y100k_1k_dense_k5 $BASE/X100k_1k_dense_k5_test 
$BASE/y100k_1k_dense_k5_test $FORMAT
-#${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X100k_1k_sparse_k5 
$BASE/y100k_1k_sparse_k5 $BASE/X100k_1k_sparse_k5_test 
$BASE/y100k_1k_sparse_k5_test $FORMAT
-#
+##generate S scenarios (800MB)
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 100000 1000 
$DENSE_SP 5 0 $BASE/X100k_1k_dense_k5 $BASE/y100k_1k_dense_k5 $FORMAT 1
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 100000 1000 
$SPARSE_SP 5 0 $BASE/X100k_1k_sparse_k5 $BASE/y100k_1k_sparse_k5 $FORMAT 1
+${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X100k_1k_dense_k5 
$BASE/y100k_1k_dense_k5 $BASE/X100k_1k_dense_k5_test 
$BASE/y100k_1k_dense_k5_test $FORMAT
+${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X100k_1k_sparse_k5 
$BASE/y100k_1k_sparse_k5 $BASE/X100k_1k_sparse_k5_test 
$BASE/y100k_1k_sparse_k5_test $FORMAT
+
 ##generate M scenarios (8GB)
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 1000000 1000 
$DENSE_SP 5 0 $BASE/X1M_1k_dense_k5 $BASE/y1M_1k_dense_k5 $FORMAT 1
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 1000000 1000 
$SPARSE_SP 5 0 $BASE/X1M_1k_sparse_k5 $BASE/y1M_1k_sparse_k5 $FORMAT 1
-#${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X1M_1k_dense_k5 
$BASE/y1M_1k_dense_k5 $BASE/X1M_1k_dense_k5_test $BASE/y1M_1k_dense_k5_test 
$FORMAT
-#${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X1M_1k_sparse_k5 
$BASE/y1M_1k_sparse_k5 $BASE/X1M_1k_sparse_k5_test $BASE/y1M_1k_sparse_k5_test 
$FORMAT
-#
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 1000000 1000 
$DENSE_SP 5 0 $BASE/X1M_1k_dense_k5 $BASE/y1M_1k_dense_k5 $FORMAT 1
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 1000000 1000 
$SPARSE_SP 5 0 $BASE/X1M_1k_sparse_k5 $BASE/y1M_1k_sparse_k5 $FORMAT 1
+${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X1M_1k_dense_k5 
$BASE/y1M_1k_dense_k5 $BASE/X1M_1k_dense_k5_test $BASE/y1M_1k_dense_k5_test 
$FORMAT
+${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X1M_1k_sparse_k5 
$BASE/y1M_1k_sparse_k5 $BASE/X1M_1k_sparse_k5_test $BASE/y1M_1k_sparse_k5_test 
$FORMAT
+
 ##generate L scenarios (80GB)
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 10000000 1000 
$DENSE_SP 5 0 $BASE/X10M_1k_dense_k5 $BASE/y10M_1k_dense_k5 $FORMAT 1
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 10000000 1000 
$SPARSE_SP 5 0 $BASE/X10M_1k_sparse_k5 $BASE/y10M_1k_sparse_k5 $FORMAT 1
-#${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X10M_1k_dense_k5 
$BASE/y10M_1k_dense_k5 $BASE/X10M_1k_dense_k5_test $BASE/y10M_1k_dense_k5_test 
$FORMAT
-#${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X10M_1k_sparse_k5 
$BASE/y10M_1k_sparse_k5 $BASE/X10M_1k_sparse_k5_test 
$BASE/y10M_1k_sparse_k5_test $FORMAT
-#
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 10000000 1000 
$DENSE_SP 5 0 $BASE/X10M_1k_dense_k5 $BASE/y10M_1k_dense_k5 $FORMAT 1
+${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 10000000 1000 
$SPARSE_SP 5 0 $BASE/X10M_1k_sparse_k5 $BASE/y10M_1k_sparse_k5 $FORMAT 1
+${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X10M_1k_dense_k5 
$BASE/y10M_1k_dense_k5 $BASE/X10M_1k_dense_k5_test $BASE/y10M_1k_dense_k5_test 
$FORMAT
+${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X10M_1k_sparse_k5 
$BASE/y10M_1k_sparse_k5 $BASE/X10M_1k_sparse_k5_test 
$BASE/y10M_1k_sparse_k5_test $FORMAT
+
 ##generate LARGE scenarios (800GB)
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 100000000 1000 
$DENSE_SP 5 0 $BASE/X100M_1k_dense_k5 $BASE/y100M_1k_dense_k5 $FORMAT 1
-#${CMD} -f ../datagen/genRandData4Multinomial.dml $DASH-args 100000000 1000 
$SPARSE_SP 5 0 $BASE/X100M_1k_sparse_k5 $BASE/y100M_1k_sparse_k5 $FORMAT 1
+#${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 100000000 1000 
$DENSE_SP 5 0 $BASE/X100M_1k_dense_k5 $BASE/y100M_1k_dense_k5 $FORMAT 1
+#${CMD} -f ./datagen/genRandData4Multinomial.dml $DASH-args 100000000 1000 
$SPARSE_SP 5 0 $BASE/X100M_1k_sparse_k5 $BASE/y100M_1k_sparse_k5 $FORMAT 1
 #${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X100M_1k_dense_k5 
$BASE/y100M_1k_dense_k5 $BASE/X100M_1k_dense_k5_test 
$BASE/y100M_1k_dense_k5_test $FORMAT
 #${CMD} -f scripts/extractTestData.dml $DASH-args $BASE/X100M_1k_sparse_k5 
$BASE/y100M_1k_sparse_k5 $BASE/X100M_1k_sparse_k5_test 
$BASE/y100M_1k_sparse_k5_test $FORMAT
diff --git a/scripts/perftest/runAll.sh b/scripts/perftest/runAll.sh
index a2f0989..3206e66 100755
--- a/scripts/perftest/runAll.sh
+++ b/scripts/perftest/runAll.sh
@@ -29,8 +29,8 @@ export LOG4JPROP='conf/log4j-off.properties'
 export SYSDS_QUIET=1
 
 # Command to be executed
-CMD="systemds"
-#CMD="./sparkDML.sh"
+#CMD="systemds"
+CMD="./sparkDML.sh"
 
 # Possible lines to initialize Intel MKL, depending on version and install 
location
 #    . ~/intel/bin/compilervars.sh intel64
@@ -68,4 +68,3 @@ date >> results/times.txt
 #./runAllSurvival $CMD $TEMPFOLDER
 #KaplanMeier
 #Cox
-
diff --git a/scripts/perftest/scripts/extractTestData.dml 
b/scripts/perftest/scripts/extractTestData.dml
index 49d465f..702ddbd 100755
--- a/scripts/perftest/scripts/extractTestData.dml
+++ b/scripts/perftest/scripts/extractTestData.dml
@@ -22,12 +22,7 @@
 X = read($1);
 y = read($2);
 
-Percent_test = .2
-N_x = as.integer(nrow(X) * Percent_test);
-N_y = as.integer(nrow(y) * Percent_test);
+[X,y,Xtest,ytest] = split(X=X, Y=y, f=0.8);
 
-X = X[1:N_x,];
-y = y[1:N_y,];
-
-write(X, $3, format=$5);
-write(y, $4, format=$5);
+write(Xtest, $3, format=$5);
+write(ytest, $4, format=$5);
diff --git a/scripts/perftest/scripts/extractTestData.dml 
b/scripts/perftest/sparkDML.sh
old mode 100755
new mode 100644
similarity index 54%
copy from scripts/perftest/scripts/extractTestData.dml
copy to scripts/perftest/sparkDML.sh
index 49d465f..370e70c
--- a/scripts/perftest/scripts/extractTestData.dml
+++ b/scripts/perftest/sparkDML.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
@@ -19,15 +20,25 @@
 #
 #-------------------------------------------------------------
 
-X = read($1);
-y = read($2);
+#set -x
 
-Percent_test = .2
-N_x = as.integer(nrow(X) * Percent_test);
-N_y = as.integer(nrow(y) * Percent_test);
 
-X = X[1:N_x,];
-y = y[1:N_y,];
+# This script is a simplified version of sparkDML.sh in order to
+# allow a simple drop-in replacement for 'hadoop jar' without
+# the need to change any command line arguments. 
 
-write(X, $3, format=$5);
-write(y, $4, format=$5);
+export SPARK_HOME=../spark-2.4.7-bin-hadoop2.7
+export HADOOP_CONF_DIR=/home/hadoop/hadoop-2.7.7/etc/hadoop
+
+$SPARK_HOME/bin/spark-submit \
+     --master yarn \
+     --deploy-mode client \
+     --driver-memory 20g \
+     --conf spark.driver.extraJavaOptions="-Xms20g 
-Dlog4j.configuration=file:/home/mboehm/perftest/conf/log4j.properties" \
+     --conf spark.ui.showConsoleProgress=true \
+     --conf spark.executor.heartbeatInterval=100s \
+     --conf spark.network.timeout=512s \
+     --num-executors 10 \
+     --executor-memory 105g \
+     --executor-cores 32 \
+     SystemDS.jar "$@" 
\ No newline at end of file

Reply via email to