Repository: systemml Updated Branches: refs/heads/master bfb30b3af -> 0505fd38c
[SYSTEMML-1929] Update Spark parameters in sparkDML.sh and docs Closes #670. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/0505fd38 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/0505fd38 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/0505fd38 Branch: refs/heads/master Commit: 0505fd38c3191551a14c9b21314b0c3432b47e2f Parents: bfb30b3 Author: Glenn Weidner <[email protected]> Authored: Sat Oct 7 17:22:18 2017 -0700 Committer: Glenn Weidner <[email protected]> Committed: Sat Oct 7 17:22:18 2017 -0700 ---------------------------------------------------------------------- docs/algorithms-classification.md | 88 ++++++++++---------- docs/algorithms-clustering.md | 28 +++---- docs/algorithms-descriptive-statistics.md | 28 +++---- docs/algorithms-matrix-factorization.md | 36 ++++---- docs/algorithms-regression.md | 72 ++++++++-------- docs/algorithms-survival-analysis.md | 32 +++---- docs/spark-batch-mode.md | 8 +- docs/spark-mlcontext-programming-guide.md | 4 +- .../examples/mnist_lenet_distrib_sgd-train.dml | 2 +- scripts/perftest/python/run_perftest.py | 1 - scripts/sparkDML.sh | 17 ++-- src/main/resources/scripts/sparkDML.sh | 17 ++-- 12 files changed, 169 insertions(+), 164 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-classification.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-classification.md b/docs/algorithms-classification.md index 1895103..62e40e7 100644 --- a/docs/algorithms-classification.md +++ b/docs/algorithms-classification.md @@ -160,9 +160,9 @@ val prediction = model.transform(X_test_df) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml -config SystemML-config.xml @@ -331,9 +331,9 @@ prediction.show() Log=/user/ml/log.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f MultiLogReg.dml -config SystemML-config.xml @@ -527,9 +527,9 @@ val model = svm.fit(X_train_df) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml -config SystemML-config.xml @@ -574,9 +574,9 @@ val prediction = model.transform(X_test_df) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm-predict.dml -config SystemML-config.xml @@ -658,9 +658,9 @@ more details on the Python API. Log=/user/ml/Log.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm.dml -config SystemML-config.xml @@ -692,9 +692,9 @@ more details on the Python API. confusion=/user/ml/confusion.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f l2-svm-predict.dml -config SystemML-config.xml @@ -797,9 +797,9 @@ val model = svm.fit(X_train_df) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f m-svm.dml -config SystemML-config.xml @@ -844,9 +844,9 @@ val prediction = model.transform(X_test_df) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f m-svm-predict.dml -config SystemML-config.xml @@ -1009,9 +1009,9 @@ prediction.show() Log=/user/ml/Log.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f m-svm.dml -config SystemML-config.xml @@ -1043,9 +1043,9 @@ prediction.show() confusion=/user/ml/confusion.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f m-svm-predict.dml -config SystemML-config.xml @@ -1148,9 +1148,9 @@ val model = nb.fit(X_train_df) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes.dml -config SystemML-config.xml @@ -1193,9 +1193,9 @@ val prediction = model.transform(X_test_df) probabilities=[file] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes-predict.dml -config SystemML-config.xml @@ -1284,9 +1284,9 @@ metrics.f1_score(newsgroups_test.target, pred, average='weighted') accuracy=/user/ml/accuracy.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes.dml -config SystemML-config.xml @@ -1316,9 +1316,9 @@ metrics.f1_score(newsgroups_test.target, pred, average='weighted') confusion=/user/ml/confusion.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f naive-bayes-predict.dml -config SystemML-config.xml @@ -1415,9 +1415,9 @@ implementation is well-suited to handle large-scale data and builds a fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree.dml -config SystemML-config.xml @@ -1453,9 +1453,9 @@ implementation is well-suited to handle large-scale data and builds a fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree-predict.dml -config SystemML-config.xml @@ -1553,9 +1553,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree.dml -config SystemML-config.xml @@ -1588,9 +1588,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f decision-tree-predict.dml -config SystemML-config.xml @@ -1823,9 +1823,9 @@ for classification in parallel. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f random-forest.dml -config SystemML-config.xml @@ -1866,9 +1866,9 @@ for classification in parallel. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f random-forest-predict.dml -config SystemML-config.xml @@ -1989,9 +1989,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f random-forest.dml -config SystemML-config.xml @@ -2027,9 +2027,9 @@ To compute predictions: fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f random-forest-predict.dml -config SystemML-config.xml http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-clustering.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-clustering.md b/docs/algorithms-clustering.md index 7554660..358a53a 100644 --- a/docs/algorithms-clustering.md +++ b/docs/algorithms-clustering.md @@ -129,9 +129,9 @@ apart is a "false negative" etc. verb=[boolean] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans.dml -config SystemML-config.xml @@ -163,9 +163,9 @@ apart is a "false negative" etc. O=[file] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml -config SystemML-config.xml @@ -255,9 +255,9 @@ standard output fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans.dml -config SystemML-config.xml @@ -284,9 +284,9 @@ standard output verb=1 </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans.dml -config SystemML-config.xml @@ -317,9 +317,9 @@ To predict Y given X and C: O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml -config SystemML-config.xml @@ -343,9 +343,9 @@ given X and C: O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml -config SystemML-config.xml @@ -368,9 +368,9 @@ labels prY: O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Kmeans-predict.dml -config SystemML-config.xml http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-descriptive-statistics.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-descriptive-statistics.md b/docs/algorithms-descriptive-statistics.md index f45ffae..1c86368 100644 --- a/docs/algorithms-descriptive-statistics.md +++ b/docs/algorithms-descriptive-statistics.md @@ -125,9 +125,9 @@ to compute the mean of a categorical attribute like âHair Colorâ. STATS=<file> </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Univar-Stats.dml -config SystemML-config.xml @@ -164,9 +164,9 @@ be stored. The format of the output matrix is defined by STATS=/user/ml/stats.mtx </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Univar-Stats.dml -config SystemML-config.xml @@ -585,9 +585,9 @@ attributes like âHair Colorâ. OUTDIR=<directory> </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f bivar-stats.dml -config SystemML-config.xml @@ -654,9 +654,9 @@ are defined in [**Table 2**](algorithms-descriptive-statistics.html#table2). OUTDIR=/user/ml/stats.mtx </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f bivar-stats.dml -config SystemML-config.xml @@ -1147,9 +1147,9 @@ becomes reversed and amplified (from $+0.1$ to $-0.5$) if we ignore the months. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f stratstats.dml -config SystemML-config.xml @@ -1355,9 +1355,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f stratstats.dml -config SystemML-config.xml @@ -1383,9 +1383,9 @@ SystemML Language Reference for details. O=/user/ml/Out.mtx </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f stratstats.dml -config SystemML-config.xml http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-matrix-factorization.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-matrix-factorization.md b/docs/algorithms-matrix-factorization.md index 8777130..b559cb5 100644 --- a/docs/algorithms-matrix-factorization.md +++ b/docs/algorithms-matrix-factorization.md @@ -56,9 +56,9 @@ top-$K$ (for a given value of $K$) principal components. OUTPUT=<file> </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f PCA.dml -config SystemML-config.xml @@ -119,9 +119,9 @@ SystemML Language Reference for details. OUTPUT=/user/ml/pca_output/ </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f PCA.dml -config SystemML-config.xml @@ -149,9 +149,9 @@ SystemML Language Reference for details. OUTPUT=/user/ml/test_output.mtx </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f PCA.dml -config SystemML-config.xml @@ -257,9 +257,9 @@ problems. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f ALS.dml -config SystemML-config.xml @@ -291,9 +291,9 @@ problems. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f ALS_predict.dml -config SystemML-config.xml @@ -322,9 +322,9 @@ problems. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f ALS_topk_predict.dml -config SystemML-config.xml @@ -431,9 +431,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f ALS.dml -config SystemML-config.xml @@ -467,9 +467,9 @@ To compute predicted ratings for a given list of users and items: fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f ALS_predict.dml -config SystemML-config.xml @@ -501,9 +501,9 @@ predicted ratings for a given list of users: fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f ALS_topk_predict.dml -config SystemML-config.xml http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-regression.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-regression.md b/docs/algorithms-regression.md index df2ad3e..18640b8 100644 --- a/docs/algorithms-regression.md +++ b/docs/algorithms-regression.md @@ -102,9 +102,9 @@ y_test = lr.fit(df_train) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegDS.dml -config SystemML-config.xml @@ -147,9 +147,9 @@ y_test = lr.fit(df_train) fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegCG.dml -config SystemML-config.xml @@ -254,9 +254,9 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - reg=1.0 </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegDS.dml -config SystemML-config.xml @@ -311,9 +311,9 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) - Log=/user/ml/log.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f LinearRegCG.dml -config SystemML-config.xml @@ -552,9 +552,9 @@ lowest AIC is computed. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f StepLinearRegDS.dml -config SystemML-config.xml @@ -623,9 +623,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f StepLinearRegDS.dml -config SystemML-config.xml @@ -755,9 +755,9 @@ distributions and link functions, see below for details. mii=[int] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM.dml -config SystemML-config.xml @@ -893,9 +893,9 @@ if no maximum limit provided Log=/user/ml/log.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM.dml -config SystemML-config.xml @@ -1230,9 +1230,9 @@ distribution family is supported (see below for details). fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f StepGLM.dml -config SystemML-config.xml @@ -1335,9 +1335,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f StepGLM.dml -config SystemML-config.xml @@ -1481,9 +1481,9 @@ this step outside the scope of `GLM-predict.dml` for now. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml @@ -1620,9 +1620,9 @@ unknown (which sets it to `1.0`). O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml @@ -1656,9 +1656,9 @@ unknown (which sets it to `1.0`). fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml @@ -1690,9 +1690,9 @@ unknown (which sets it to `1.0`). O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml @@ -1725,9 +1725,9 @@ unknown (which sets it to `1.0`). O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml @@ -1758,9 +1758,9 @@ unknown (which sets it to `1.0`). O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml @@ -1793,9 +1793,9 @@ unknown (which sets it to `1.0`). O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml @@ -1832,9 +1832,9 @@ unknown (which sets it to `1.0`). O=/user/ml/stats.csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f GLM-predict.dml -config SystemML-config.xml http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/algorithms-survival-analysis.md ---------------------------------------------------------------------- diff --git a/docs/algorithms-survival-analysis.md b/docs/algorithms-survival-analysis.md index 239ab08..943d4d7 100644 --- a/docs/algorithms-survival-analysis.md +++ b/docs/algorithms-survival-analysis.md @@ -57,9 +57,9 @@ censored and uncensored survival times. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f KM.dml -config SystemML-config.xml @@ -152,9 +152,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f KM.dml -config SystemML-config.xml @@ -189,9 +189,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f KM.dml -config SystemML-config.xml @@ -461,9 +461,9 @@ may be categorical (ordinal or nominal) as well as continuous-valued. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Cox.dml -config SystemML-config.xml @@ -503,9 +503,9 @@ may be categorical (ordinal or nominal) as well as continuous-valued. fmt=[format] </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Cox-predict.dml -config SystemML-config.xml @@ -612,9 +612,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Cox.dml -config SystemML-config.xml @@ -651,9 +651,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Cox.dml -config SystemML-config.xml @@ -691,9 +691,9 @@ SystemML Language Reference for details. fmt=csv </div> <div data-lang="Spark" markdown="1"> - $SPARK_HOME/bin/spark-submit --master yarn-cluster + $SPARK_HOME/bin/spark-submit --master yarn + --deploy-mode cluster --conf spark.driver.maxResultSize=0 - --conf spark.akka.frameSize=128 SystemML.jar -f Cox-predict.dml -config SystemML-config.xml http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/spark-batch-mode.md ---------------------------------------------------------------------- diff --git a/docs/spark-batch-mode.md b/docs/spark-batch-mode.md index 7f8f4c0..349f17c 100644 --- a/docs/spark-batch-mode.md +++ b/docs/spark-batch-mode.md @@ -41,7 +41,7 @@ mode in more depth. # Spark Batch Mode Invocation Syntax -SystemML can be invoked in Hadoop Batch mode using the following syntax: +SystemML can be invoked in Spark Batch mode using the following syntax: spark-submit SystemML.jar [-? | -help | -f <filename>] (-config <config_filename>) ([-args | -nvargs] <args-list>) @@ -63,7 +63,7 @@ to be deprecated. All the primary algorithm scripts included with SystemML use n # Execution modes SystemML works seamlessly with all Spark execution modes, including *local* (`--master local[*]`), -*yarn client* (`--master yarn-client`), *yarn cluster* (`--master yarn-cluster`), *etc*. More +*yarn client* (`--master yarn --deploy-mode client`), *yarn cluster* (`--master yarn --deploy-mode cluster`), *etc*. More information on Spark cluster execution modes can be found on the [official Spark cluster deployment documentation](https://spark.apache.org/docs/latest/cluster-overview.html). *Note* that Spark can be easily run on a laptop in local mode using the `--master local[*]` described @@ -71,8 +71,8 @@ above, which SystemML supports. # Recommended Spark Configuration Settings -For best performance, we recommend setting the following flags when running SystemML with Spark: -`--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128`. +For best performance, we recommend setting the following configuration value when running SystemML with Spark: +`--conf spark.driver.maxResultSize=0`. # Examples http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/docs/spark-mlcontext-programming-guide.md ---------------------------------------------------------------------- diff --git a/docs/spark-mlcontext-programming-guide.md b/docs/spark-mlcontext-programming-guide.md index e935c65..63e48be 100644 --- a/docs/spark-mlcontext-programming-guide.md +++ b/docs/spark-mlcontext-programming-guide.md @@ -2814,5 +2814,5 @@ plt.title('PNMF Training Loss') # Recommended Spark Configuration Settings -For best performance, we recommend setting the following flags when running SystemML with Spark: -`--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128`. +For best performance, we recommend setting the following configuration value when running SystemML with Spark: +`--conf spark.driver.maxResultSize=0`. http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml b/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml index c397c1f..7243f6a 100644 --- a/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml +++ b/scripts/nn/examples/mnist_lenet_distrib_sgd-train.dml @@ -62,7 +62,7 @@ # 2. Execute using Spark # ``` # spark-submit --master local[*] --driver-memory 10G -# --conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128 +# --conf spark.driver.maxResultSize=0 # $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_lenet_distrib_sgd-train.dml # -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv # C=1 Hin=28 Win=28 K=10 batch_size=32 parallel_batches=4 epochs=10 http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/scripts/perftest/python/run_perftest.py ---------------------------------------------------------------------- diff --git a/scripts/perftest/python/run_perftest.py b/scripts/perftest/python/run_perftest.py index 7fd40ec..6c016a8 100755 --- a/scripts/perftest/python/run_perftest.py +++ b/scripts/perftest/python/run_perftest.py @@ -299,7 +299,6 @@ if __name__ == '__main__': # Default Conf default_conf = 'spark.driver.maxResultSize=0 ' \ - 'spark.akka.frameSize=128 ' \ 'spark.network.timeout=6000s ' \ 'spark.rpc.askTimeout=6000s ' \ 'spark.memory.useLegacyMode=true ' \ http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/scripts/sparkDML.sh ---------------------------------------------------------------------- diff --git a/scripts/sparkDML.sh b/scripts/sparkDML.sh index 7bea639..4a098b1 100755 --- a/scripts/sparkDML.sh +++ b/scripts/sparkDML.sh @@ -39,12 +39,13 @@ fi # Default Values -master="--master yarn-client" +master="--master yarn" +deploy_mode="--deploy-mode client" driver_memory="--driver-memory 20G" num_executors="--num-executors 5" executor_memory="--executor-memory 60G" executor_cores="--executor-cores 24" -conf="--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128" +conf="--conf spark.driver.maxResultSize=0" # error help print @@ -58,19 +59,19 @@ Usage: $0 [-h] [SPARK-SUBMIT OPTIONS] -f <dml-filename> [SYSTEMML OPTIONS] Examples: $0 -f genGNMF.dml --nvargs V=/tmp/V.mtx W=/tmp/W.mtx H=/tmp/H.mtx rows=100000 cols=800 k=50 $0 --driver-memory 5G -f GNMF.dml --explain hops -nvargs ... - $0 --master yarn-cluster -f hdfs:/user/GNMF.dml + $0 --master yarn --deploy-mode cluster -f hdfs:/user/GNMF.dml -h | -? Print this usage message and exit SPARK-SUBMIT OPTIONS: --conf <property>=<value> Configuration settings: spark.driver.maxResultSize Default: 0 - spark.akka.frameSize Default: 128 - --driver-memory <num> Memory for driver (e.g. 512M)] Default: 20G - --master <string> local | yarn-client | yarn-cluster] Default: yarn-client + --driver-memory <num> Memory for driver (e.g. 512M) Default: 20G + --master <string> local | yarn Default: yarn + --deploy-mode <string> client | cluster Default: client --num-executors <num> Number of executors to launch (e.g. 2) Default: 5 --executor-memory <num> Memory per executor (e.g. 1G) Default: 60G - --executor-cores <num> Memory per executor (e.g. ) Default: 24 + --executor-cores <num> Number of cores per executor (e.g. 1) Default: 24 -f DML script file name, e.g. hdfs:/user/biadmin/test.dml @@ -90,6 +91,7 @@ while true ; do case "$1" in -h) printUsageExit ; exit 1 ;; --master) master="--master "$2 ; shift 2 ;; + --deploy-mode) deploy_mode="--deploy-mode "$2 ; shift 2 ;; --driver-memory) driver_memory="--driver-memory "$2 ; shift 2 ;; --num-executors) num_executors="--num-executors "$2 ; shift 2 ;; --executor-memory) executor_memory="--executor-memory "$2 ; shift 2 ;; @@ -109,6 +111,7 @@ done $SPARK_HOME/bin/spark-submit \ ${master} \ + ${deploy_mode} \ ${driver_memory} \ ${num_executors} \ ${executor_memory} \ http://git-wip-us.apache.org/repos/asf/systemml/blob/0505fd38/src/main/resources/scripts/sparkDML.sh ---------------------------------------------------------------------- diff --git a/src/main/resources/scripts/sparkDML.sh b/src/main/resources/scripts/sparkDML.sh index a68d34a..1f1bdd6 100644 --- a/src/main/resources/scripts/sparkDML.sh +++ b/src/main/resources/scripts/sparkDML.sh @@ -39,12 +39,13 @@ fi # Default Values -master="--master yarn-client" +master="--master yarn" +deploy_mode="--deploy-mode client" driver_memory="--driver-memory 20G" num_executors="--num-executors 5" executor_memory="--executor-memory 60G" executor_cores="--executor-cores 24" -conf="--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128" +conf="--conf spark.driver.maxResultSize=0" # error help print @@ -58,19 +59,19 @@ Usage: $0 [-h] [SPARK-SUBMIT OPTIONS] -f <dml-filename> [SYSTEMML OPTIONS] Examples: $0 -f genGNMF.dml --nvargs V=/tmp/V.mtx W=/tmp/W.mtx H=/tmp/H.mtx rows=100000 cols=800 k=50 $0 --driver-memory 5G -f GNMF.dml --explain hops -nvargs ... - $0 --master yarn-cluster -f hdfs:/user/GNMF.dml + $0 --master yarn --deploy-mode cluster -f hdfs:/user/GNMF.dml -h | -? Print this usage message and exit SPARK-SUBMIT OPTIONS: --conf <property>=<value> Configuration settings: spark.driver.maxResultSize Default: 0 - spark.akka.frameSize Default: 128 - --driver-memory <num> Memory for driver (e.g. 512M)] Default: 20G - --master <string> local | yarn-client | yarn-cluster] Default: yarn-client + --driver-memory <num> Memory for driver (e.g. 512M) Default: 20G + --master <string> local | yarn Default: yarn + --deploy-mode <string> client | cluster Default: client --num-executors <num> Number of executors to launch (e.g. 2) Default: 5 --executor-memory <num> Memory per executor (e.g. 1G) Default: 60G - --executor-cores <num> Memory per executor (e.g. ) Default: 24 + --executor-cores <num> Number of cores per executor (e.g. 1) Default: 24 -f DML script file name, e.g. hdfs:/user/biadmin/test.dml @@ -90,6 +91,7 @@ while true ; do case "$1" in -h) printUsageExit ; exit 1 ;; --master) master="--master "$2 ; shift 2 ;; + --deploy-mode) deploy_mode="--deploy-mode "$2 ; shift 2 ;; --driver-memory) driver_memory="--driver-memory "$2 ; shift 2 ;; --num-executors) num_executors="--num-executors "$2 ; shift 2 ;; --executor-memory) executor_memory="--executor-memory "$2 ; shift 2 ;; @@ -109,6 +111,7 @@ done $SPARK_HOME/bin/spark-submit \ ${master} \ + ${deploy_mode} \ ${driver_memory} \ ${num_executors} \ ${executor_memory} \
