This is an automated email from the ASF dual-hosted git repository.
markd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push:
new edd3a8d [SYSTEMDS-38] Runscript rename, improve, fix cygwin
edd3a8d is described below
commit edd3a8d6804da648b925993ea7d7742df30beb10
Author: Mark Dokter <[email protected]>
AuthorDate: Fri May 8 20:34:53 2020 +0200
[SYSTEMDS-38] Runscript rename, improve, fix cygwin
* Runscript is now called systemds (formerly systemds.sh)
* For the sake of consistency, one may use '-f' now in front of the dml
script file
* The script now works in cygwin on Windows (make sure the script stays in
unix LF format now (git for win usually checks out files in CRLF)
* Cleaned up messy handling of absolute and relative paths (using
'realpath' executable from gnu coreutils package now - should be part of any
standard bash installation)
* Fixed some directory separator issues, shellcheck warnings and native
libs path
* Replaced all occurances of systemds.sh in the codebase
---
bin/README.md | 4 +-
bin/{systemds.sh => systemds} | 173 ++++++++++-----------
dev/release/simple-release-verify.sh | 4 +-
docker/sysds.Dockerfile | 2 +-
docs/Tasks.txt | 1 +
src/assembly/bin/README.md | 8 +-
src/main/python/docs/source/federated.rst | 8 +-
src/main/python/tests/federated/runFedTest.sh | 4 +-
src/main/python/tests/lineage/test_lineagetrace.py | 2 +-
9 files changed, 98 insertions(+), 108 deletions(-)
diff --git a/bin/README.md b/bin/README.md
index 2ed2c4b..406c06b 100644
--- a/bin/README.md
+++ b/bin/README.md
@@ -49,7 +49,7 @@ first open an terminal and go to an empty folder, then
execute the following.
# Create a hello World script
echo 'print("HelloWorld!")' > hello.dml
# Execute hello world Script
-systemds.sh hello.dml
+systemds hello.dml
# Remove the hello.dml
rm hello.dml
```
@@ -79,7 +79,7 @@ echo '{"rows": 1, "cols": 4, "format": "csv"}' >
data/types.csv.mtd
### Executing the DML script
```shell script
-bin/systemds.sh Univar-Stats.dml -nvargs X=data/haberman.data
TYPES=data/types.csv STATS=data/univarOut.mtx CONSOLE_OUTPUT=TRUE
+bin/systemds Univar-Stats.dml -nvargs X=data/haberman.data
TYPES=data/types.csv STATS=data/univarOut.mtx CONSOLE_OUTPUT=TRUE
```
## Using Intel MKL native instructions
diff --git a/bin/systemds.sh b/bin/systemds
similarity index 71%
rename from bin/systemds.sh
rename to bin/systemds
index 80e9721..9590f14 100755
--- a/bin/systemds.sh
+++ b/bin/systemds
@@ -29,7 +29,8 @@
# Make configuration changes here:
##############################################################
-# if not set by env, set to 1 to run spark-submit instead of local java
+# If not set by env, set to 1 to run spark-submit instead of local java
+# This should be used if "-exec SPARK" or "-exec HYBRID" is required
if [ -z "$SYSDS_DISTRIBUTED" ]; then
SYSDS_DISTRIBUTED=0
fi
@@ -76,7 +77,7 @@ fi
printUsageExit()
{
cat << EOF
-Usage: $0 [SystemDS.jar] <dml-filename> [arguments] [-help]
+Usage: $0 [SystemDS.jar] [-f] <dml-filename> [arguments] [-help]
SystemDS.jar - Specify a custom SystemDS.jar file (this will be prepended
to the classpath
or fed to spark-submit
@@ -104,7 +105,7 @@ if [ -z "$1" ] ; then
printUsageExit;
fi
-while getopts "h:" options; do
+while getopts "h:f:" options; do
case $options in
h ) echo Warning: Help requested. Will exit after usage message;
printUsageExit
@@ -112,25 +113,15 @@ while getopts "h:" options; do
\? ) echo Warning: Help requested. Will exit after usage message;
printUsageExit
;;
+ f )
+ # silently remove -f (this variant is triggered if there's no
+ # jar file or WORKER as first parameter)
+ shift
+ ;;
* ) echo Error: Unexpected error while processing options;
esac
done
-# convert directory delimiter from *nix to windows
-win_delim()
-{
- declare -n ret=$1
- OLDIFS=$IFS
- IFS="/"
- new_path=""
- for i in ${ret}; do
- new_path="${new_path}$i\\"
- done
- IFS=$OLDIFS
-
- ret=${new_path:0:-1}
-}
-
# an echo toggle
print_out()
{
@@ -139,54 +130,19 @@ print_out()
fi
}
-# converts a relative path to an absolute path
-rel_path()
-{
- declare -n ret=${1}
- source=$(pwd)
- target=${2}
- common_part=$source
-# echo "${target#$common_part}"
- back=
- while [ "${target#$common_part}" == "${target}" ]; do
- common_part=$(dirname "$common_part")
- back="../${back}"
- # echo ${back}
- # echo ${common_part}
- # if [ back == common_part ]; then
- # break
- # fi
- done
-
-# echo ${back}${target#$common_part/}
-
-if [ -z "${back=}" ]; then
- ret=./${target#$common_part/}
- else
- ret=${back}${target#$common_part/}
- fi
-}
-
-# converts an absolute to a relative path
-abs_path()
-{
- OLD=$(pwd)
- cd "$1"
- ABS=$(pwd)
- cd "$OLD"
- echo "$ABS"/
-}
-
-# above be helper functions
-#-------------------------------------------------------------
-
-
# Peel off first and/or second argument so that $@ contains arguments to DML
script
if echo "$1" | grep -q "jar"; then
SYSTEMDS_JAR_FILE=$1
shift
- SCRIPT_FILE=$1
- shift
+ # handle optional '-f' before DML file (for consistency)
+ if echo "$1" | grep -q "\-f"; then
+ shift
+ SCRIPT_FILE=$1
+ shift
+ else
+ SCRIPT_FILE=$1
+ shift
+ fi
elif echo "$1" | grep -q "WORKER"; then
WORKER=1
shift
@@ -201,31 +157,39 @@ elif echo "$1" | grep -q "WORKER"; then
printUsageExit
fi
else
- SCRIPT_FILE=$1
- shift
+ # handle optional '-f' before DML file (for consistency)
+ if echo "$1" | grep -q "\-f"; then
+ shift
+ SCRIPT_FILE=$1
+ shift
+ else
+ SCRIPT_FILE=$1
+ shift
+ fi
fi
if [ -z "$WORKER" ] ; then
WORKER=0
fi
-
if [ -z "$SYSTEMDS_ROOT" ] ; then
SYSTEMDS_ROOT=.
else
# construct a relative path
- rel_path REL "$(abs_path ${SYSTEMDS_ROOT})"
- SYSTEMDS_ROOT=${REL}
+ SYSTEMDS_ROOT=$(realpath --relative-to=. ${SYSTEMDS_ROOT})
echo "Using existing SystemDS at ${SYSTEMDS_ROOT}"
fi;
+# when using find, look in the directories in this order
+DIR_SEARCH_ORDER="conf lib $SYSTEMDS_ROOT/conf $SYSTEMDS_ROOT/target"
+
# find me a SystemDS jar file to run
if [ -z "$SYSTEMDS_JAR_FILE" ];then
- SYSTEMDS_JAR_FILE=$(find "$SYSTEMDS_ROOT" -iname "systemds.jar" | tail -n 1)
+ SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds.jar" 2>
/dev/null | tail -n 1)
if [ -z "$SYSTEMDS_JAR_FILE" ];then
- SYSTEMDS_JAR_FILE=$(find "$SYSTEMDS_ROOT" -iname "systemds-?.?.?.jar" |
tail -n 1)
+ SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname "systemds-?.?.?.jar" 2>
/dev/null | head -n 1)
if [ -z "$SYSTEMDS_JAR_FILE" ];then
- SYSTEMDS_JAR_FILE=$(find "$SYSTEMDS_ROOT" -iname
"systemds-?.?.?-SNAPSHOT.jar" | tail -n 1)
+ SYSTEMDS_JAR_FILE=$(find $DIR_SEARCH_ORDER -iname
"systemds-?.?.?-SNAPSHOT.jar" 2> /dev/null | head -n 1)
fi
fi
else
@@ -234,23 +198,56 @@ fi
# check if log4j config file exists, otherwise unset
# to run with a non fatal complaint by SystemDS
-LOG4JPROP=$(find "$SYSTEMDS_ROOT" -iname "log4j*properties" | tail -n 1)
-if [ -z "${LOG4JPROP}" ]; then
- LOG4JPROP=""
+if [ -z "$LOG4JPROP" ] ; then
+ LOG4JPROP=$(find $DIR_SEARCH_ORDER -iname "log4j*properties" 2> /dev/null |
head -n 1)
+ if [ -z "${LOG4JPROP}" ]; then
+ LOG4JPROP=""
+ else
+ LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP"
+ fi
+else
+ # L4J was set by env var. Unset if that setting is wrong
+ LOG4JPROP2=$(find "$LOG4JPROP")
+ if [ -z "${LOG4JPROP2}" ]; then
+ LOG4JPROP=""
+ else
+ LOG4JPROP="-Dlog4j.configuration=file:$LOG4JPROP2"
+ fi
fi
-# same as above: set config file param if the file exists
-CONFIG_FILE=$(find "$SYSTEMDS_ROOT" -iname "SystemDS*config*.xml" | tail -n 1)
-if [ -z "$CONFIG_FILE" ]; then
- CONFIG_FILE=""
+if [ -z "$CONFIG_FILE" ] ; then
+ # same as above: set config file param if the file exists
+ CONFIG_FILE=$(find $DIR_SEARCH_ORDER -iname "SystemDS*config*.xml" 2>
/dev/null | head -n 1)
+ if [ -z "$CONFIG_FILE" ]; then
+ CONFIG_FILE=""
+ else
+ CONFIG_FILE="--config $CONFIG_FILE"
+ fi
else
- CONFIG_FILE="--config $CONFIG_FILE"
+ # CONFIG_FILE was set by env var. Unset if that setting is wrong
+ CONFIG_FILE2=$(find "$CONFIG_FILE")
+ if [ -z "${CONFIG_FILE2}" ]; then
+ CONFIG_FILE=""
+ else
+ CONFIG_FILE="--config $CONFIG_FILE"
+ fi
+fi
+
+# find absolute path to hadoop home in SYSTEMDS_ROOT
+if [ -z "$HADOOP_HOME" ]; then
+ HADOOP_HOME=$(realpath "$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )")
+ export HADOOP_HOME
fi
+# add hadoop home to path and lib path for loading hadoop jni
+HADOOP_REL=$(realpath --relative-to=. "$HADOOP_HOME")
+# default directory separator unix style
DIR_SEP=/
# detect operating system to set correct path separator
-if [ "$OSTYPE" == "win32" ] || [ "$OSTYPE" == "msys" ] ; then
+if [ "$OSTYPE" == "win32" ] || [ "$OSTYPE" == "msys" ] || [ "$OSTYPE" ==
"cygwin" ]; then
PATH_SEP=\;
+ DIR_SEP=\\
+ HADOOP_REL="${HADOOP_REL////\\}"
else
PATH_SEP=:
fi
@@ -259,19 +256,11 @@ fi
JARNAME=$(basename "$SYSTEMDS_JAR_FILE")
# relative path to jar file
-rel_path R "$(abs_path "$(dirname "$SYSTEMDS_JAR_FILE")")"
-SYSTEMDS_JAR_FILE="${R}${DIR_SEP}${JARNAME}"
+SYSTEMDS_JAR_FILE=$(realpath --relative-to=. "$(dirname
"$SYSTEMDS_JAR_FILE")")${DIR_SEP}${JARNAME}
-# find hadoop home
-if [ -z "$HADOOP_HOME" ]; then
- HADOOP_HOME=$(abs_path "$(find "$SYSTEMDS_ROOT" -iname hadoop | tail -n 1 )")
- export HADOOP_HOME
-fi
-
-# add hadoop home to path and lib path for loading hadoop jni
-rel_path HADOOP_REL "$HADOOP_HOME"
-export PATH=${PATH}${PATH_SEP}${HADOOP_REL}bin
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}${PATH_SEP}${HADOOP_REL}bin
+NATIVE_LIBS="$SYSTEMDS_ROOT${DIR_SEP}target${DIR_SEP}classes${DIR_SEP}lib"
+export PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${PATH}${PATH_SEP}$NATIVE_LIBS
+export LD_LIBRARY_PATH=${HADOOP_REL}${DIR_SEP}bin${PATH_SEP}${LD_LIBRARY_PATH}
# set java class path
CLASSPATH="${SYSTEMDS_JAR_FILE}${PATH_SEP} \
@@ -311,7 +300,7 @@ elif [ $SYSDS_DISTRIBUTED == 0 ]; then
CMD=" \
java $SYSTEMDS_STANDALONE_OPTS \
-cp $CLASSPATH \
- -Dlog4j.configuration=file:$LOG4JPROP \
+ $LOG4JPROP \
org.apache.sysds.api.DMLScript \
-f $SCRIPT_FILE \
-exec singlenode \
diff --git a/dev/release/simple-release-verify.sh
b/dev/release/simple-release-verify.sh
index 98d2095..7a09dc0 100755
--- a/dev/release/simple-release-verify.sh
+++ b/dev/release/simple-release-verify.sh
@@ -198,7 +198,7 @@ if [[ "$BIN_VERIFY" == "true" ]]; then
tar -xzf $DIST_DIR/systemds-$TAG-bin.tgz
cd systemds-$TAG-bin
echo "print('hello world');" > hello.dml
- ./systemds.sh hello.dml
+ ./systemds hello.dml
cd ..
echo "`date +%Y-%m-%dT%H:%M:%S`: INFO: Verification of tgz files completed
successfully."
@@ -208,7 +208,7 @@ if [[ "$BIN_VERIFY" == "true" ]]; then
unzip -q $DIST_DIR/systemds-$TAG-bin.zip
cd systemds-$TAG-bin
echo "print('hello world');" > hello.dml
- ./systemds.sh hello.dml
+ ./systemds hello.dml
cd ..
echo "`date +%Y-%m-%dT%H:%M:%S`: INFO: Verification of zip files completed
successfully."
diff --git a/docker/sysds.Dockerfile b/docker/sysds.Dockerfile
index ce8cd4f..01a0094 100644
--- a/docker/sysds.Dockerfile
+++ b/docker/sysds.Dockerfile
@@ -57,4 +57,4 @@ RUN rm -r src/ && \
COPY docker/mountFolder/main.dml /input/main.dml
-CMD ["systemds.sh", "/input/main.dml"]
+CMD ["systemds", "/input/main.dml"]
diff --git a/docs/Tasks.txt b/docs/Tasks.txt
index 9fa9a6f..ed7e3e1 100644
--- a/docs/Tasks.txt
+++ b/docs/Tasks.txt
@@ -34,6 +34,7 @@ SYSTEMDS-30 Builtin and Packaging
* 35 Replace unnecessary dependencies w/ custom
* 36 Shell script for AWS execution OK
* 37 Cleanup mvn package, compile, test OK
+ * 38 Runscript rename, improve, fix cygwin OK
SYSTEMDS-40 Preprocessing builtins
* 41 Add new winsorize builtin function OK
diff --git a/src/assembly/bin/README.md b/src/assembly/bin/README.md
index a0688fe..4fff234 100644
--- a/src/assembly/bin/README.md
+++ b/src/assembly/bin/README.md
@@ -53,7 +53,7 @@ of sparsity. As you can see, DML can access these parameters
by specifying $1, $
#### Running a script locally
```shell script
-$ ./systemds.sh hello.dml -args 10 10 1.0
+$ ./systemds hello.dml -args 10 10 1.0
```
#### Running a script locally, providing your own SystemDS.jar file
@@ -61,14 +61,14 @@ $ ./systemds.sh hello.dml -args 10 10 1.0
If you compiled SystemDS from source, you can of course use the created JAR
file with the run script.
```shell script
-$ ./systemds.sh path/to/the/SystemDS.jar hello.dml -args 10 10 1.0
+$ ./systemds path/to/the/SystemDS.jar hello.dml -args 10 10 1.0
```
#### Running a script locally, in your SystemDS source environment
If you have cloned the SystemDS source repository and want to run your DML
script with that, you can point the
shell script to the source directory by setting the `SYSTEMDS_ROOT`
environment variable.
```shell script
-$ SYSTEMDS_ROOT=../../code/my-systemds/source ./systemds.sh hello.dml -args
10 10 1.0
+$ SYSTEMDS_ROOT=../../code/my-systemds/source ./systemds hello.dml -args 10
10 1.0
```
#### Running a script distributed on a Spark cluster
@@ -78,7 +78,7 @@ To force cluster mode in this little test, we will increase
the matrix size to g
something to do and force SystemDS to only generate Spark instructions by
adding -exec spark to the command line
parameters:
```shell script
-$ SYSDS_DISTRIBUTED=1 ./systemds.sh hello.dml -args 10000 10000 1.0 -exec spark
+$ SYSDS_DISTRIBUTED=1 ./systemds hello.dml -args 10000 10000 1.0 -exec spark
```
The output should read something similar to this (the warning can be safely
ignored):
diff --git a/src/main/python/docs/source/federated.rst
b/src/main/python/docs/source/federated.rst
index b2de7bb..cd63e4d 100644
--- a/src/main/python/docs/source/federated.rst
+++ b/src/main/python/docs/source/federated.rst
@@ -37,7 +37,7 @@ A simple guide to do this is in the SystemDS Repository_.
If that is setup correctly simply start a worker using the following command.
Here the ``8001`` refer to the port used by the worker.
- systemds.sh WORKER 8001
+ systemds WORKER 8001
Simple Aggregation Example
--------------------------
@@ -88,9 +88,9 @@ Using the data created from the last example we can simulate
multiple federated workers by starting multiple ones on different ports.
I recommend to start 3 different terminals, and run one federated environment
in each.
-| systemds.sh WORKER 8001
-| systemds.sh WORKER 8002
-| systemds.sh WORKER 8003
+| systemds WORKER 8001
+| systemds WORKER 8002
+| systemds WORKER 8003
Once all three workers are up and running we can leverage all three in the
following example::
diff --git a/src/main/python/tests/federated/runFedTest.sh
b/src/main/python/tests/federated/runFedTest.sh
index 33f82b5..f6fec6e 100755
--- a/src/main/python/tests/federated/runFedTest.sh
+++ b/src/main/python/tests/federated/runFedTest.sh
@@ -37,9 +37,9 @@ log="$outputdir/out.log"
# Make the workers start quietly and pipe their output to a file to print later
export SYSDS_QUIET=1
-systemds.sh WORKER 8001 >$w1_Output 2>&1 &
+systemds WORKER 8001 >$w1_Output 2>&1 &
Fed1=$!
-systemds.sh WORKER 8002 >$w2_Output 2>&1 &
+systemds WORKER 8002 >$w2_Output 2>&1 &
Fed2=$!
echo "Starting workers" && sleep 3 && echo "Starting tests"
diff --git a/src/main/python/tests/lineage/test_lineagetrace.py
b/src/main/python/tests/lineage/test_lineagetrace.py
index e462c48..5ee886f 100644
--- a/src/main/python/tests/lineage/test_lineagetrace.py
+++ b/src/main/python/tests/lineage/test_lineagetrace.py
@@ -79,7 +79,7 @@ def create_execute_and_trace_dml(script: str, name: str):
# Call SYSDS!
result_file_name = temp_dir + "/" + name + ".txt"
- os.system("systemds.sh " + script_file_name + " > " + result_file_name)
+ os.system("systemds " + script_file_name + " > " + result_file_name)
return parse_trace(result_file_name)