Use Apache Commons CLI to parse command line arguments in DMLScript
- Uses Apache Commons CLI 1.2 to parse command line options
- Known limitation - strips arguments of leading and trailing double
quotes
- Changed scripts to accept "-config " instead of "-config="
- Instead of "-gpu force=true", accepts "-gpu force"
- Concise description of usage options
- Updated bin/systemml script to print usage options when passed the
"-help" option
- Removed DMLScriptTest{1,2}, lots of test cases added as unit tests as
they were test the previous hand-rolled command line parsing
- Added unit tests
Closes #435
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit:
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/346d1c01
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/346d1c01
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/346d1c01
Branch: refs/heads/master
Commit: 346d1c01ad94c5b8178b8c9baf7d38e0867805da
Parents: ee6bc8c
Author: Nakul Jindal <[email protected]>
Authored: Mon Mar 27 13:35:12 2017 -0700
Committer: Nakul Jindal <[email protected]>
Committed: Mon Mar 27 13:35:12 2017 -0700
----------------------------------------------------------------------
bin/systemml | 84 ++-
bin/systemml.bat | 4 +-
docs/algorithms-classification.md | 44 +-
docs/algorithms-clustering.md | 14 +-
docs/algorithms-descriptive-statistics.md | 14 +-
docs/algorithms-matrix-factorization.md | 18 +-
docs/algorithms-regression.md | 36 +-
docs/algorithms-survival-analysis.md | 16 +-
docs/hadoop-batch-mode.md | 16 +-
docs/spark-batch-mode.md | 4 +-
docs/standalone-guide.md | 2 +-
docs/troubleshooting-guide.md | 2 +-
scripts/sparkDML.sh | 2 +-
.../java/org/apache/sysml/api/DMLScript.java | 613 ++++++++++++-------
.../java/org/apache/sysml/api/MLContext.java | 66 +-
.../java/org/apache/sysml/api/ScriptType.java | 65 ++
.../org/apache/sysml/api/jmlc/Connection.java | 22 +-
.../sysml/api/mlcontext/MLContextUtil.java | 1 +
.../org/apache/sysml/api/mlcontext/Script.java | 1 +
.../sysml/api/mlcontext/ScriptExecutor.java | 10 +-
.../sysml/api/mlcontext/ScriptFactory.java | 1 +
.../apache/sysml/api/mlcontext/ScriptType.java | 65 --
.../org/apache/sysml/parser/AParserWrapper.java | 28 +-
.../runtime/instructions/cp/BooleanObject.java | 2 +-
.../org/apache/sysml/yarn/DMLYarnClient.java | 19 +-
src/main/resources/scripts/sparkDML.sh | 2 +-
src/main/standalone/runStandaloneSystemML.bat | 4 +-
src/main/standalone/runStandaloneSystemML.sh | 2 +-
.../test/integration/AutomatedTestBase.java | 3 +-
.../functions/dmlscript/DMLScriptTest1.java | 125 ----
.../functions/dmlscript/DMLScriptTest2.java | 151 -----
.../functions/misc/DataTypeChangeTest.java | 27 +-
.../parfor/ParForDependencyAnalysisTest.java | 15 +-
.../TransformFrameEncodeDecodeTest.java | 11 +-
.../integration/mlcontext/MLContextTest.java | 12 +-
.../sysml/test/unit/CLIOptionsParserTest.java | 419 +++++++++++++
.../functions/dmlscript/ZPackageSuite.java | 37 --
37 files changed, 1121 insertions(+), 836 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/bin/systemml
----------------------------------------------------------------------
diff --git a/bin/systemml b/bin/systemml
index 0ccee2d..44ab45e 100755
--- a/bin/systemml
+++ b/bin/systemml
@@ -20,32 +20,22 @@
#
#-------------------------------------------------------------
+
# error help print
-printUsageExit()
+printSimpleUsage()
{
cat << EOF
Usage: $0 <dml-filename> [arguments] [-help]
- -help - Print this usage message and exit
+ -help - Print detailed help message
EOF
exit 1
}
-# Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m
[Custom-Java-Options] -jar StandaloneSystemML.jar -f <dml-filename> -exec
singlenode -config=SystemML-config.xml [Optional-Arguments]'
-while getopts "h:" options; do
- case $options in
- h ) echo Warning: Help requested. Will exit after usage message
- printUsageExit
- ;;
- \? ) echo Warning: Help requested. Will exit after usage message
- printUsageExit
- ;;
- * ) echo Error: Unexpected error while processing options
- esac
-done
+# Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m
[Custom-Java-Options] -jar StandaloneSystemML.jar -f <dml-filename> -exec
singlenode -config=SystemML-config.xml [Optional-Arguments]'
if [ -z "$1" ] ; then
echo "Wrong Usage.";
- printUsageExit;
+ printSimpleUsage
fi
@@ -98,24 +88,6 @@ then
fi
-# Peel off first argument so that $@ contains arguments to DML script
-SCRIPT_FILE=$1
-shift
-
-# if the script file path was omitted, try to complete the script path
-if [ ! -f "$SCRIPT_FILE" ]
-then
- SCRIPT_FILE_NAME=$(basename $SCRIPT_FILE)
- SCRIPT_FILE_FOUND=$(find "$PROJECT_ROOT_DIR/scripts" -name
"$SCRIPT_FILE_NAME")
- if [ ! "$SCRIPT_FILE_FOUND" ]
- then
- echo "Could not find DML script: $SCRIPT_FILE"
- printUsageExit;
- else
- SCRIPT_FILE=$SCRIPT_FILE_FOUND
- echo "DML script: $SCRIPT_FILE"
- fi
-fi
# add hadoop libraries which were generated by the build to the classpath
@@ -149,13 +121,57 @@ if [ -f "${PROJECT_ROOT_DIR}/conf/systemml-env.sh" ]; then
fi
fi
+
+printUsageExit()
+{
+CMD="\
+java ${SYSTEMML_DEFAULT_JAVA_OPTS} \
+org.apache.sysml.api.DMLScript \
+-help"
+# echo ${CMD}
+eval ${CMD}
+exit 0
+}
+
+while getopts "h:" options; do
+ case $options in
+ h ) echo Warning: Help requested. Will exit after usage message
+ printUsageExit
+ ;;
+ \? ) echo Warning: Help requested. Will exit after usage message
+ printUsageExit
+ ;;
+ * ) echo Error: Unexpected error while processing options
+ esac
+done
+
+# Peel off first argument so that $@ contains arguments to DML script
+SCRIPT_FILE=$1
+shift
+
+# if the script file path was omitted, try to complete the script path
+if [ ! -f "$SCRIPT_FILE" ]
+then
+ SCRIPT_FILE_NAME=$(basename $SCRIPT_FILE)
+ SCRIPT_FILE_FOUND=$(find "$PROJECT_ROOT_DIR/scripts" -name
"$SCRIPT_FILE_NAME")
+ if [ ! "$SCRIPT_FILE_FOUND" ]
+ then
+ echo "Could not find DML script: $SCRIPT_FILE"
+ printSimpleUsage
+ else
+ SCRIPT_FILE=$SCRIPT_FILE_FOUND
+ echo "DML script: $SCRIPT_FILE"
+ fi
+fi
+
+
# Invoke the jar with options and arguments
CMD="\
java ${SYSTEMML_DEFAULT_JAVA_OPTS} \
org.apache.sysml.api.DMLScript \
-f '$SCRIPT_FILE' \
-exec singlenode \
--config='$PROJECT_ROOT_DIR/conf/SystemML-config.xml' \
+-config '$PROJECT_ROOT_DIR/conf/SystemML-config.xml' \
$@"
eval ${CMD}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/bin/systemml.bat
----------------------------------------------------------------------
diff --git a/bin/systemml.bat b/bin/systemml.bat
index 3fc86a8..e16a2a1 100755
--- a/bin/systemml.bat
+++ b/bin/systemml.bat
@@ -117,7 +117,7 @@ set CMD=java -Xmx4g -Xms2g -Xmn400m ^
org.apache.sysml.api.DMLScript ^
-f %SCRIPT_FILE% ^
-exec singlenode ^
- -config="%PROJECT_ROOT_DIR%\conf\SystemML-config.xml" ^
+ -config "%PROJECT_ROOT_DIR%\conf\SystemML-config.xml" ^
%DML_OPT_ARGS%
:: execute the java command
@@ -141,7 +141,7 @@ GOTO Msg
:Msg
ECHO Usage: runStandaloneSystemML.bat ^<dml-filename^> [arguments] [-help]
-ECHO Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m -jar jSystemML.jar
-f ^<dml-filename^> -exec singlenode -config=SystemML-config.xml
[Optional-Arguments]'
+ECHO Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m -jar jSystemML.jar
-f ^<dml-filename^> -exec singlenode -config SystemML-config.xml
[Optional-Arguments]'
GOTO ExitErr
:ExitErr
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-classification.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-classification.md
b/docs/algorithms-classification.md
index 0ee43bf..11bd1da 100644
--- a/docs/algorithms-classification.md
+++ b/docs/algorithms-classification.md
@@ -165,7 +165,7 @@ val prediction = model.transform(X_test_df)
--conf spark.akka.frameSize=128
SystemML.jar
-f MultiLogReg.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -336,7 +336,7 @@ prediction.show()
--conf spark.akka.frameSize=128
SystemML.jar
-f MultiLogReg.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -532,7 +532,7 @@ val model = svm.fit(X_train_df)
--conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -579,7 +579,7 @@ val prediction = model.transform(X_test_df)
--conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=[file]
@@ -661,7 +661,7 @@ using a held-out test set. Note that this is an optional
argument.
--conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/y.mtx
@@ -695,7 +695,7 @@ using a held-out test set. Note that this is an optional
argument.
--conf spark.akka.frameSize=128
SystemML.jar
-f l2-svm-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/y.mtx
@@ -800,7 +800,7 @@ val model = svm.fit(X_train_df)
--conf spark.akka.frameSize=128
SystemML.jar
-f m-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -847,7 +847,7 @@ val prediction = model.transform(X_test_df)
--conf spark.akka.frameSize=128
SystemML.jar
-f m-svm-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=[file]
@@ -1014,7 +1014,7 @@ prediction.show()
--conf spark.akka.frameSize=128
SystemML.jar
-f m-svm.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/y.mtx
@@ -1048,7 +1048,7 @@ prediction.show()
--conf spark.akka.frameSize=128
SystemML.jar
-f m-svm-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/y.mtx
@@ -1153,7 +1153,7 @@ val model = nb.fit(X_train_df)
--conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -1198,7 +1198,7 @@ val prediction = model.transform(X_test_df)
--conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=[file]
@@ -1289,7 +1289,7 @@ metrics.f1_score(newsgroups_test.target, pred,
average='weighted')
--conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/y.mtx
@@ -1321,7 +1321,7 @@ metrics.f1_score(newsgroups_test.target, pred,
average='weighted')
--conf spark.akka.frameSize=128
SystemML.jar
-f naive-bayes-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/y.mtx
@@ -1420,7 +1420,7 @@ implementation is well-suited to handle large-scale data
and builds a
--conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -1458,7 +1458,7 @@ implementation is well-suited to handle large-scale data
and builds a
--conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=[file]
@@ -1558,7 +1558,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -1593,7 +1593,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f decision-tree-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -1828,7 +1828,7 @@ for classification in parallel.
--conf spark.akka.frameSize=128
SystemML.jar
-f random-forest.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -1871,7 +1871,7 @@ for classification in parallel.
--conf spark.akka.frameSize=128
SystemML.jar
-f random-forest-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=[file]
@@ -1994,7 +1994,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f random-forest.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -2032,7 +2032,7 @@ To compute predictions:
--conf spark.akka.frameSize=128
SystemML.jar
-f random-forest-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-clustering.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-clustering.md b/docs/algorithms-clustering.md
index 023a7f5..0c91fa1 100644
--- a/docs/algorithms-clustering.md
+++ b/docs/algorithms-clustering.md
@@ -134,7 +134,7 @@ apart is a "false negative"Â etc.
--conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
C=[file]
@@ -168,7 +168,7 @@ apart is a "false negative"Â etc.
--conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=[file]
C=[file]
@@ -260,7 +260,7 @@ standard output
--conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
k=5
@@ -289,7 +289,7 @@ standard output
--conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
k=5
@@ -322,7 +322,7 @@ To predict Y given X and C:
--conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
C=/user/ml/C.mtx
@@ -348,7 +348,7 @@ given X and C:
--conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
C=/user/ml/C.mtx
@@ -373,7 +373,7 @@ labels prY:
--conf spark.akka.frameSize=128
SystemML.jar
-f Kmeans-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs spY=/user/ml/Y.mtx
prY=/user/ml/PredY.mtx
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-descriptive-statistics.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-descriptive-statistics.md
b/docs/algorithms-descriptive-statistics.md
index 1ddf01a..f45ffae 100644
--- a/docs/algorithms-descriptive-statistics.md
+++ b/docs/algorithms-descriptive-statistics.md
@@ -130,7 +130,7 @@ to compute the mean of a categorical attribute like âHair
Colorâ.
--conf spark.akka.frameSize=128
SystemML.jar
-f Univar-Stats.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
TYPES=<file>
@@ -169,7 +169,7 @@ be stored. The format of the output matrix is defined by
--conf spark.akka.frameSize=128
SystemML.jar
-f Univar-Stats.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
TYPES=/user/ml/types.mtx
@@ -590,7 +590,7 @@ attributes like âHair Colorâ.
--conf spark.akka.frameSize=128
SystemML.jar
-f bivar-stats.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
index1=<file>
@@ -659,7 +659,7 @@ are defined in [**TableÂ
2**](algorithms-descriptive-statistics.html#table2).
--conf spark.akka.frameSize=128
SystemML.jar
-f bivar-stats.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
index1=/user/ml/S1.mtx
@@ -1152,7 +1152,7 @@ becomes reversed and amplified (from $+0.1$ to $-0.5$) if
we ignore the months.
--conf spark.akka.frameSize=128
SystemML.jar
-f stratstats.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Xcid=[file]
@@ -1360,7 +1360,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f stratstats.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Xcid=/user/ml/Xcid.mtx
@@ -1388,7 +1388,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f stratstats.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/Data.mtx
Xcid=/user/ml/Xcid.mtx
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-matrix-factorization.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-matrix-factorization.md
b/docs/algorithms-matrix-factorization.md
index 51eb614..9af8c19 100644
--- a/docs/algorithms-matrix-factorization.md
+++ b/docs/algorithms-matrix-factorization.md
@@ -61,7 +61,7 @@ top-$K$ (for a given value of $K$) principal components.
--conf spark.akka.frameSize=128
SystemML.jar
-f PCA.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs INPUT=<file>
K=<int>
@@ -124,7 +124,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f PCA.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs INPUT=/user/ml/input.mtx
K=10
@@ -154,7 +154,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f PCA.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs INPUT=/user/ml/test_input.mtx
K=10
@@ -262,7 +262,7 @@ problems.
--conf spark.akka.frameSize=128
SystemML.jar
-f ALS.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs V=<file>
L=<file>
@@ -296,7 +296,7 @@ problems.
--conf spark.akka.frameSize=128
SystemML.jar
-f ALS_predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -327,7 +327,7 @@ problems.
--conf spark.akka.frameSize=128
SystemML.jar
-f ALS_topk_predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -436,7 +436,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f ALS.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs V=/user/ml/V
L=/user/ml/L
@@ -472,7 +472,7 @@ To compute predicted ratings for a given list of users and
items:
--conf spark.akka.frameSize=128
SystemML.jar
-f ALS_predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X
Y=/user/ml/Y
@@ -506,7 +506,7 @@ predicted ratings for a given list of users:
--conf spark.akka.frameSize=128
SystemML.jar
-f ALS_topk_predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X
Y=/user/ml/Y
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-regression.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-regression.md b/docs/algorithms-regression.md
index 80b38a3..284063a 100644
--- a/docs/algorithms-regression.md
+++ b/docs/algorithms-regression.md
@@ -107,7 +107,7 @@ y_test = lr.fit(df_train)
--conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegDS.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -152,7 +152,7 @@ y_test = lr.fit(df_train)
--conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegCG.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -258,7 +258,7 @@ print("Residual sum of squares: %.2f" %
np.mean((regr.predict(diabetes_X_test) -
--conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegDS.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -316,7 +316,7 @@ print("Residual sum of squares: %.2f" %
np.mean((regr.predict(diabetes_X_test) -
--conf spark.akka.frameSize=128
SystemML.jar
-f LinearRegCG.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -557,7 +557,7 @@ lowest AIC is computed.
--conf spark.akka.frameSize=128
SystemML.jar
-f StepLinearRegDS.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -628,7 +628,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f StepLinearRegDS.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -760,7 +760,7 @@ distributions and link functions, see below for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -898,7 +898,7 @@ if no maximum limit provided
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -1235,7 +1235,7 @@ distribution family is supported (see below for details).
--conf spark.akka.frameSize=128
SystemML.jar
-f StepGLM.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=<file>
@@ -1340,7 +1340,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f StepGLM.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
Y=/user/ml/Y.mtx
@@ -1486,7 +1486,7 @@ this step outside the scope of `GLM-predict.dml` for now.
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
Y=[file]
@@ -1625,7 +1625,7 @@ unknown (which sets it to `1.0`).
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs dfam=1
vpow=0.0
@@ -1661,7 +1661,7 @@ unknown (which sets it to `1.0`).
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs dfam=1
vpow=0.0
@@ -1695,7 +1695,7 @@ unknown (which sets it to `1.0`).
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs dfam=2
link=2
@@ -1730,7 +1730,7 @@ unknown (which sets it to `1.0`).
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs dfam=2
link=3
@@ -1763,7 +1763,7 @@ unknown (which sets it to `1.0`).
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs dfam=3
X=/user/ml/X.mtx
@@ -1798,7 +1798,7 @@ unknown (which sets it to `1.0`).
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs dfam=1
vpow=1.0
@@ -1837,7 +1837,7 @@ unknown (which sets it to `1.0`).
--conf spark.akka.frameSize=128
SystemML.jar
-f GLM-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs dfam=1
vpow=2.0
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/algorithms-survival-analysis.md
----------------------------------------------------------------------
diff --git a/docs/algorithms-survival-analysis.md
b/docs/algorithms-survival-analysis.md
index a5e641e..239ab08 100644
--- a/docs/algorithms-survival-analysis.md
+++ b/docs/algorithms-survival-analysis.md
@@ -62,7 +62,7 @@ censored and uncensored survival times.
--conf spark.akka.frameSize=128
SystemML.jar
-f KM.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
TE=<file>
@@ -157,7 +157,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f KM.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
TE=/user/ml/TE
@@ -194,7 +194,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f KM.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
TE=/user/ml/TE
@@ -466,7 +466,7 @@ may be categorical (ordinal or nominal) as well as
continuous-valued.
--conf spark.akka.frameSize=128
SystemML.jar
-f Cox.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
TE=<file>
@@ -508,7 +508,7 @@ may be categorical (ordinal or nominal) as well as
continuous-valued.
--conf spark.akka.frameSize=128
SystemML.jar
-f Cox-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=<file>
RT=<file>
@@ -617,7 +617,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f Cox.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
TE=/user/ml/TE
@@ -656,7 +656,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f Cox.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X.mtx
TE=/user/ml/TE
@@ -696,7 +696,7 @@ SystemML Language Reference for details.
--conf spark.akka.frameSize=128
SystemML.jar
-f Cox-predict.dml
- -config=SystemML-config.xml
+ -config SystemML-config.xml
-exec hybrid_spark
-nvargs X=/user/ml/X-sorted.mtx
RT=/user/ml/recoded-timestamps.csv
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/hadoop-batch-mode.md
----------------------------------------------------------------------
diff --git a/docs/hadoop-batch-mode.md b/docs/hadoop-batch-mode.md
index ddc1c1f..3af7c0c 100644
--- a/docs/hadoop-batch-mode.md
+++ b/docs/hadoop-batch-mode.md
@@ -49,11 +49,11 @@ refer to the Hadoop documentation.
SystemML can be invoked in Hadoop Batch mode using the following syntax:
- hadoop jar SystemML.jar [-? | -help | -f <filename>]
(-config=<config_filename>) ([-args | -nvargs] <args-list>)
+ hadoop jar SystemML.jar [-? | -help | -f <filename>] (-config
<config_filename>) ([-args | -nvargs] <args-list>)
The `SystemML.jar` file is specified to Hadoop using the `jar` option.
The DML script to invoke is specified after the `-f` argument. Configuration
settings can be passed to SystemML
-using the optional `-config=` argument. DML scripts can optionally take named
arguments (`-nvargs`) or positional
+using the optional `-config ` argument. DML scripts can optionally take named
arguments (`-nvargs`) or positional
arguments (`-args`). Named arguments are preferred over positional arguments.
Positional arguments are considered
to be deprecated. All the primary algorithm scripts included with SystemML use
named arguments.
@@ -69,11 +69,11 @@ to be deprecated. All the primary algorithm scripts
included with SystemML use n
In a clustered environment, it is *highly* recommended that SystemML
configuration settings are specified
in a `SystemML-config.xml` file. By default, SystemML will look for this file
in the current working
-directory (`./SystemML-config.xml`). This location can be overridden by the
`-config=` argument.
+directory (`./SystemML-config.xml`). This location can be overridden by the
`-config ` argument.
**Example #3: DML Invocation with Configuration File Explicitly Specified and
Named Arguments**
- hadoop jar systemml/SystemML.jar -f systemml/algorithms/Kmeans.dml
-config=/conf/SystemML-config.xml -nvargs X=X.mtx k=5
+ hadoop jar systemml/SystemML.jar -f systemml/algorithms/Kmeans.dml
-config /conf/SystemML-config.xml -nvargs X=X.mtx k=5
For recommended SystemML configuration settings in a clustered environment,
please see
[Recommended Hadoop Cluster Configuration
Settings](hadoop-batch-mode.html#recommended-hadoop-cluster-configuration-settings).
@@ -170,7 +170,7 @@ arguments to the DML script were specified following the
`-nvargs` option.
In the console output, we see a warning that no default SystemML config file
was found in the current working directory.
In a distributed environment on a large data set, it is highly advisable to
specify configuration settings in a SystemML config file for
-optimal performance. The location of the SystemML config file can be
explicitly specified using the `-config=` argument.
+optimal performance. The location of the SystemML config file can be
explicitly specified using the `-config ` argument.
The OptimizerUtils warning occurs because parallel multi-threaded text reads
in Java versions less than 1.8 result
in thread contention issues, so only a single thread reads matrix data in text
formats.
@@ -859,7 +859,7 @@ A description of the named arguments that can be passed in
to this script can be
`genRandData4Kmeans.dml` file. For data, I'll generate a matrix `X.mtx`
consisting of 1 million rows and 100 features. I'll explicitly reference my
`SystemML-config.xml` file, since I'm
executing SystemML in Hadoop from my home directory rather than from the
SystemML project root directory.
- [hadoop@host1 ~]$ hadoop jar
systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f genRandData4Kmeans.dml
-config=systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs
nr=1000000 nf=100 nc=10 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=X.mtx C=C.mtx
Y=Y.mtx YbyC=YbyC.mtx
+ [hadoop@host1 ~]$ hadoop jar
systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f genRandData4Kmeans.dml
-config systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs
nr=1000000 nf=100 nc=10 dc=10.0 dr=1.0 fbf=100.0 cbf=100.0 X=X.mtx C=C.mtx
Y=Y.mtx YbyC=YbyC.mtx
After the data generation has finished, I'll check HDFS for the amount of
space used. The 1M-row matrix `X.mtx`
requires about 2.8GB of space.
@@ -895,7 +895,7 @@ Here we can see the `X.mtx` data files.
Next, I'll run the `Kmeans.dml` algorithm on the 1M-row matrix `X.mtx`.
- [hadoop@host1 ~]$ hadoop jar
systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f
systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans.dml
-config=/systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx
k=5 C=Centroids.mtx
+ [hadoop@host1 ~]$ hadoop jar
systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f
systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans.dml -config
/systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx k=5
C=Centroids.mtx
We can see the `Centroids.mtx` data file has been written to HDFS.
@@ -916,7 +916,7 @@ We can see the `Centroids.mtx` data file has been written
to HDFS.
Now that we have trained our model, next we will test our model. We can do
this with
the `Kmeans-predict.dml` script.
- [hadoop@host1 ~]$ hadoop jar
systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f
systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans-predict.dml
-config=systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx
C=Centroids.mtx prY=PredY.mtx O=stats.txt
+ [hadoop@host1 ~]$ hadoop jar
systemml-{{site.SYSTEMML_VERSION}}/SystemML.jar -f
systemml-{{site.SYSTEMML_VERSION}}/algorithms/Kmeans-predict.dml -config
systemml-{{site.SYSTEMML_VERSION}}/SystemML-config.xml -nvargs X=X.mtx
C=Centroids.mtx prY=PredY.mtx O=stats.txt
In the file system, we can see that the `PredY.mtx` matrix was created.
The `stats.txt` file lists statistics about the results.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/spark-batch-mode.md
----------------------------------------------------------------------
diff --git a/docs/spark-batch-mode.md b/docs/spark-batch-mode.md
index c199b1f..39bcd3e 100644
--- a/docs/spark-batch-mode.md
+++ b/docs/spark-batch-mode.md
@@ -43,10 +43,10 @@ mode in more depth.
SystemML can be invoked in Hadoop Batch mode using the following syntax:
- spark-submit SystemML.jar [-? | -help | -f <filename>]
(-config=<config_filename>) ([-args | -nvargs] <args-list>)
+ spark-submit SystemML.jar [-? | -help | -f <filename>] (-config
<config_filename>) ([-args | -nvargs] <args-list>)
The DML script to invoke is specified after the `-f` argument. Configuration
settings can be passed to SystemML
-using the optional `-config=` argument. DML scripts can optionally take named
arguments (`-nvargs`) or positional
+using the optional `-config ` argument. DML scripts can optionally take named
arguments (`-nvargs`) or positional
arguments (`-args`). Named arguments are preferred over positional arguments.
Positional arguments are considered
to be deprecated. All the primary algorithm scripts included with SystemML use
named arguments.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/standalone-guide.md
----------------------------------------------------------------------
diff --git a/docs/standalone-guide.md b/docs/standalone-guide.md
index 2c2092d..586e56e 100644
--- a/docs/standalone-guide.md
+++ b/docs/standalone-guide.md
@@ -605,5 +605,5 @@ script (`runStandaloneSystemML.sh` or
`runStandaloneSystemML.bat`) to increase
the memory available to the JVM, i.e:
java -Xmx16g -Xms4g -Xmn1g -cp ${CLASSPATH} org.apache.sysml.api.DMLScript
\
- -f ${SCRIPT_FILE} -exec singlenode -config=SystemML-config.xml \
+ -f ${SCRIPT_FILE} -exec singlenode -config SystemML-config.xml \
$@
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/docs/troubleshooting-guide.md
----------------------------------------------------------------------
diff --git a/docs/troubleshooting-guide.md b/docs/troubleshooting-guide.md
index 629bcf5..4731f51 100644
--- a/docs/troubleshooting-guide.md
+++ b/docs/troubleshooting-guide.md
@@ -91,7 +91,7 @@ They can also be configured on a **per SystemML-task basis**
by inserting the fo
Note: The default `SystemML-config.xml` is located in `<path to SystemML
root>/conf/`. It is passed to SystemML using the `-config` argument:
- hadoop jar SystemML.jar [-? | -help | -f <filename>]
(-config=<config_filename>) ([-args | -nvargs] <args-list>)
+ hadoop jar SystemML.jar [-? | -help | -f <filename>] (-config
<config_filename>) ([-args | -nvargs] <args-list>)
See [Invoking SystemML in Hadoop Batch Mode](hadoop-batch-mode.html) for
details of the syntax.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/346d1c01/scripts/sparkDML.sh
----------------------------------------------------------------------
diff --git a/scripts/sparkDML.sh b/scripts/sparkDML.sh
index 5548859..7bea639 100755
--- a/scripts/sparkDML.sh
+++ b/scripts/sparkDML.sh
@@ -116,7 +116,7 @@ $SPARK_HOME/bin/spark-submit \
${conf} \
${SYSTEMML_HOME}/SystemML.jar \
-f ${f} \
- -config=${SYSTEMML_HOME}/SystemML-config.xml \
+ -config ${SYSTEMML_HOME}/SystemML-config.xml \
-exec hybrid_spark \
$explain \
$stats \