[3/3] spark-website git commit: Add the news about spark-summit-eu-2017 agenda
Add the news about spark-summit-eu-2017 agenda Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/35eb1471 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/35eb1471 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/35eb1471 Branch: refs/heads/asf-site Commit: 35eb1471704a97c18e96b46f2495a7117565466d Parents: cca972e Author: Yin Huai Authored: Mon Aug 28 22:40:10 2017 + Committer: Yin Huai Committed: Mon Aug 28 15:54:26 2017 -0700 -- ...-08-28-spark-summit-eu-2017-agenda-posted.md | 17 ++ site/committers.html| 6 +- site/community.html | 6 +- site/contributing.html | 6 +- site/developer-tools.html | 6 +- site/documentation.html | 6 +- site/downloads.html | 6 +- site/examples.html | 6 +- site/faq.html | 6 +- site/graphx/index.html | 6 +- site/improvement-proposals.html | 6 +- site/index.html | 6 +- site/mailing-lists.html | 6 +- site/mllib/index.html | 6 +- site/news/amp-camp-2013-registration-ope.html | 6 +- .../news/announcing-the-first-spark-summit.html | 6 +- .../news/fourth-spark-screencast-published.html | 6 +- site/news/index.html| 16 +- site/news/nsdi-paper.html | 6 +- site/news/one-month-to-spark-summit-2015.html | 6 +- .../proposals-open-for-spark-summit-east.html | 6 +- ...registration-open-for-spark-summit-east.html | 6 +- .../news/run-spark-and-shark-on-amazon-emr.html | 6 +- site/news/spark-0-6-1-and-0-5-2-released.html | 6 +- site/news/spark-0-6-2-released.html | 6 +- site/news/spark-0-7-0-released.html | 6 +- site/news/spark-0-7-2-released.html | 6 +- site/news/spark-0-7-3-released.html | 6 +- site/news/spark-0-8-0-released.html | 6 +- site/news/spark-0-8-1-released.html | 6 +- site/news/spark-0-9-0-released.html | 6 +- site/news/spark-0-9-1-released.html | 6 +- site/news/spark-0-9-2-released.html | 6 +- site/news/spark-1-0-0-released.html | 6 +- site/news/spark-1-0-1-released.html | 6 +- site/news/spark-1-0-2-released.html | 6 +- site/news/spark-1-1-0-released.html | 6 +- site/news/spark-1-1-1-released.html | 6 +- site/news/spark-1-2-0-released.html | 6 +- site/news/spark-1-2-1-released.html | 6 +- site/news/spark-1-2-2-released.html | 6 +- site/news/spark-1-3-0-released.html | 6 +- site/news/spark-1-4-0-released.html | 6 +- site/news/spark-1-4-1-released.html | 6 +- site/news/spark-1-5-0-released.html | 6 +- site/news/spark-1-5-1-released.html | 6 +- site/news/spark-1-5-2-released.html | 6 +- site/news/spark-1-6-0-released.html | 6 +- site/news/spark-1-6-1-released.html | 6 +- site/news/spark-1-6-2-released.html | 6 +- site/news/spark-1-6-3-released.html | 6 +- site/news/spark-2-0-0-released.html | 6 +- site/news/spark-2-0-1-released.html | 6 +- site/news/spark-2-0-2-released.html | 6 +- site/news/spark-2-1-0-released.html | 6 +- site/news/spark-2-1-1-released.html | 6 +- site/news/spark-2-2-0-released.html | 6 +- site/news/spark-2.0.0-preview.html | 6 +- .../spark-accepted-into-apache-incubator.html | 6 +- site/news/spark-and-shark-in-the-news.html | 6 +- site/news/spark-becomes-tlp.html| 6 +- site/news/spark-featured-in-wired.html | 6 +- .../spark-mailing-lists-moving-to-apache.html | 6 +- site/news/spark-meetups.html| 6 +- site/news/spark-screencasts-published.html | 6 +- site/news/spark-summit-2013-is-a-wrap.html | 6 +- site/news/spark-summit-2014-videos-posted.html | 6 +- site/news/spark-summit-2015-videos-posted.html | 6 +- site/news/spark-summit-agenda-posted.html | 6 +- .../spark-summit-east-2015-videos-posted.html | 6 +- .../spark-summit-east-2016-cfp-closing.html | 6 +- .../spark-summit-east-2017-agenda-posted.html | 6 +- site/news/spark-summit-east-agenda-posted.html | 6 +- .../spark-summit-eu-2017-agenda-posted.html | 223 +++ .../news/spark-summit-europe-agenda-posted.html | 6 +- sit
[1/3] spark-website git commit: Add the news about spark-summit-eu-2017 agenda
Repository: spark-website Updated Branches: refs/heads/asf-site cca972e7f -> 35eb14717 http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-3-0.html -- diff --git a/site/releases/spark-release-1-3-0.html b/site/releases/spark-release-1-3-0.html index 10d934b..5e4d302 100644 --- a/site/releases/spark-release-1-3-0.html +++ b/site/releases/spark-release-1-3-0.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-3-1.html -- diff --git a/site/releases/spark-release-1-3-1.html b/site/releases/spark-release-1-3-1.html index 7df8028..116898f 100644 --- a/site/releases/spark-release-1-3-1.html +++ b/site/releases/spark-release-1-3-1.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-4-0.html -- diff --git a/site/releases/spark-release-1-4-0.html b/site/releases/spark-release-1-4-0.html index 143cc17..b75a496 100644 --- a/site/releases/spark-release-1-4-0.html +++ b/site/releases/spark-release-1-4-0.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-4-1.html -- diff --git a/site/releases/spark-release-1-4-1.html b/site/releases/spark-release-1-4-1.html index ccdd161..30b92fd 100644 --- a/site/releases/spark-release-1-4-1.html +++ b/site/releases/spark-release-1-4-1.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-5-0.html -- diff --git a/site/releases/spark-release-1-5-0.html b/site/releases/spark-release-1-5-0.html index f73ab5d..6e1411d 100644 --- a/site/releases/spark-release-1-5-0.html +++ b/site/releases/spark-release-1-5-0.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-5-1.html -- diff --git a/site/releases/spark-release-1-5-1.html b/site/releases/spark-release-1-5-1.html index 3af892e..b447dd7 100644 --- a/site/releases/spark-release-1-5-1.html +++ b/site/releases/spark-release-1-5
[2/3] spark-website git commit: Add the news about spark-summit-eu-2017 agenda
http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-accepted-into-apache-incubator.html -- diff --git a/site/news/spark-accepted-into-apache-incubator.html b/site/news/spark-accepted-into-apache-incubator.html index 62638f2..a4a913f 100644 --- a/site/news/spark-accepted-into-apache-incubator.html +++ b/site/news/spark-accepted-into-apache-incubator.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-and-shark-in-the-news.html -- diff --git a/site/news/spark-and-shark-in-the-news.html b/site/news/spark-and-shark-in-the-news.html index 4a0c4fc..55d2ade 100644 --- a/site/news/spark-and-shark-in-the-news.html +++ b/site/news/spark-and-shark-in-the-news.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-becomes-tlp.html -- diff --git a/site/news/spark-becomes-tlp.html b/site/news/spark-becomes-tlp.html index 6c76d20..0f17857 100644 --- a/site/news/spark-becomes-tlp.html +++ b/site/news/spark-becomes-tlp.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-featured-in-wired.html -- diff --git a/site/news/spark-featured-in-wired.html b/site/news/spark-featured-in-wired.html index 1d35e40..1c0b69a 100644 --- a/site/news/spark-featured-in-wired.html +++ b/site/news/spark-featured-in-wired.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-mailing-lists-moving-to-apache.html -- diff --git a/site/news/spark-mailing-lists-moving-to-apache.html b/site/news/spark-mailing-lists-moving-to-apache.html index b586b65..4e12162 100644 --- a/site/news/spark-mailing-lists-moving-to-apache.html +++ b/site/news/spark-mailing-lists-moving-to-apache.html @@ -161,6 +161,9 @@ Latest News + Spark Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted + (Aug 28, 2017) + Spark 2.2.0 released (Jul 11, 2017) @@ -170,9 +173,6 @@ Spark Summit (June 5-7th, 2017, San Francisco) agenda posted (Mar 31, 2017) - Spark Summit East (Feb 7-9th, 2017, Boston) agenda posted - (Jan 04, 2017) - Archive http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-meetups.html -- diff --git a/site/news/spark-meetups.html b/site/news/spark-meetups.html index 4de6525..92da537 100644 --- a/site/news/spark-meetups.html +++ b/site/news/spark-meetups.html @@ -161,6 +161,
spark git commit: [SPARK-17139][ML] Add model summary for MultinomialLogisticRegression
Repository: spark Updated Branches: refs/heads/master 73e64f7d5 -> c7270a46f [SPARK-17139][ML] Add model summary for MultinomialLogisticRegression ## What changes were proposed in this pull request? Add 4 traits, using the following hierarchy: LogisticRegressionSummary LogisticRegressionTrainingSummary: LogisticRegressionSummary BinaryLogisticRegressionSummary: LogisticRegressionSummary BinaryLogisticRegressionTrainingSummary: LogisticRegressionTrainingSummary, BinaryLogisticRegressionSummary and the public method such as `def summary` only return trait type listed above. and then implement 4 concrete classes: LogisticRegressionSummaryImpl (multiclass case) LogisticRegressionTrainingSummaryImpl (multiclass case) BinaryLogisticRegressionSummaryImpl (binary case). BinaryLogisticRegressionTrainingSummaryImpl (binary case). ## How was this patch tested? Existing tests & added tests. Author: WeichenXu Closes #15435 from WeichenXu123/mlor_summary. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c7270a46 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c7270a46 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c7270a46 Branch: refs/heads/master Commit: c7270a46fc340db62c87ddfc6568603d0b832845 Parents: 73e64f7 Author: Weichen Xu Authored: Mon Aug 28 13:31:01 2017 -0700 Committer: Joseph K. Bradley Committed: Mon Aug 28 13:31:01 2017 -0700 -- .../ml/classification/LogisticRegression.scala | 340 +++ .../LogisticRegressionSuite.scala | 160 +++-- .../ml/regression/LinearRegressionSuite.scala | 2 +- project/MimaExcludes.scala | 21 +- 4 files changed, 412 insertions(+), 111 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c7270a46/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 21957d9..ffe4b52 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -22,7 +22,7 @@ import java.util.Locale import scala.collection.mutable import breeze.linalg.{DenseVector => BDV} -import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN} +import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN} import org.apache.hadoop.fs.Path import org.apache.spark.SparkException @@ -35,7 +35,7 @@ import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util._ -import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics +import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, MulticlassMetrics} import org.apache.spark.mllib.linalg.VectorImplicits._ import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer import org.apache.spark.mllib.util.MLUtils @@ -882,21 +882,28 @@ class LogisticRegression @Since("1.2.0") ( val model = copyValues(new LogisticRegressionModel(uid, coefficientMatrix, interceptVector, numClasses, isMultinomial)) -// TODO: implement summary model for multinomial case -val m = if (!isMultinomial) { - val (summaryModel, probabilityColName) = model.findSummaryModelAndProbabilityCol() - val logRegSummary = new BinaryLogisticRegressionTrainingSummary( + +val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel() +val logRegSummary = if (numClasses <= 2) { + new BinaryLogisticRegressionTrainingSummaryImpl( summaryModel.transform(dataset), probabilityColName, +predictionColName, $(labelCol), $(featuresCol), objectiveHistory) - model.setSummary(Some(logRegSummary)) } else { - model + new LogisticRegressionTrainingSummaryImpl( +summaryModel.transform(dataset), +probabilityColName, +predictionColName, +$(labelCol), +$(featuresCol), +objectiveHistory) } -instr.logSuccess(m) -m +model.setSummary(Some(logRegSummary)) +instr.logSuccess(model) +model } @Since("1.4.0") @@ -1010,8 +1017,8 @@ class LogisticRegressionModel private[spark] ( private var trainingSummary: Option[LogisticRegressionTrainingSummary] = None /** - * Gets summary of model on training set. An exception is - * thrown if `trainingSumm
spark git commit: [SPARK-19662][SCHEDULER][TEST] Add Fair Scheduler Unit Test coverage for different build cases
Repository: spark Updated Branches: refs/heads/master 24e6c187f -> 73e64f7d5 [SPARK-19662][SCHEDULER][TEST] Add Fair Scheduler Unit Test coverage for different build cases ## What changes were proposed in this pull request? Fair Scheduler can be built via one of the following options: - By setting a `spark.scheduler.allocation.file` property, - By setting `fairscheduler.xml` into classpath. These options are checked **in order** and fair-scheduler is built via first found option. If invalid path is found, `FileNotFoundException` will be expected. This PR aims unit test coverage of these use cases and a minor documentation change has been added for second option(`fairscheduler.xml` into classpath) to inform the users. Also, this PR was related with #16813 and has been created separately to keep patch content as isolated and to help the reviewers. ## How was this patch tested? Added new Unit Tests. Author: erenavsarogullari Closes #16992 from erenavsarogullari/SPARK-19662. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73e64f7d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73e64f7d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73e64f7d Branch: refs/heads/master Commit: 73e64f7d50ba7a8469bd76f97e0a22fad41c2caa Parents: 24e6c18 Author: erenavsarogullari Authored: Mon Aug 28 14:54:00 2017 -0500 Committer: Imran Rashid Committed: Mon Aug 28 14:54:00 2017 -0500 -- .../resources/fairscheduler-with-valid-data.xml | 35 .../org/apache/spark/scheduler/PoolSuite.scala | 44 docs/job-scheduling.md | 2 +- 3 files changed, 80 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/73e64f7d/core/src/test/resources/fairscheduler-with-valid-data.xml -- diff --git a/core/src/test/resources/fairscheduler-with-valid-data.xml b/core/src/test/resources/fairscheduler-with-valid-data.xml new file mode 100644 index 000..3d88233 --- /dev/null +++ b/core/src/test/resources/fairscheduler-with-valid-data.xml @@ -0,0 +1,35 @@ + + + + + +3 +1 +FIFO + + +4 +2 +FAIR + + +2 +3 +FAIR + + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark/blob/73e64f7d/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala index 4901062..5bd3955 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.scheduler +import java.io.FileNotFoundException import java.util.Properties import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite} @@ -292,6 +293,49 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext { } } + test("Fair Scheduler should build fair scheduler when " + +"valid spark.scheduler.allocation.file property is set") { +val xmlPath = getClass.getClassLoader.getResource("fairscheduler-with-valid-data.xml").getFile() +val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, xmlPath) +sc = new SparkContext(LOCAL, APP_NAME, conf) + +val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0) +val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf) +schedulableBuilder.buildPools() + +verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO) +verifyPool(rootPool, "pool1", 3, 1, FIFO) +verifyPool(rootPool, "pool2", 4, 2, FAIR) +verifyPool(rootPool, "pool3", 2, 3, FAIR) + } + + test("Fair Scheduler should use default file(fairscheduler.xml) if it exists in classpath " + +"and spark.scheduler.allocation.file property is not set") { +val conf = new SparkConf() +sc = new SparkContext(LOCAL, APP_NAME, conf) + +val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0) +val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf) +schedulableBuilder.buildPools() + +verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO) +verifyPool(rootPool, "1", 2, 1, FIFO) +verifyPool(rootPool, "2", 3, 1, FIFO) +verifyPool(rootPool, "3", 0, 1, FIFO) + } + + test("Fair Scheduler should throw FileNotFoundException " + +"when invalid spark.scheduler.allocation.file property is set") { +val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, "INVALID_FILE_PATH") +sc = new SparkContext(LO
spark git commit: [SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for launching daemons like History Server
Repository: spark Updated Branches: refs/heads/branch-2.2 0d4ef2f69 -> 59bb7ebfb [SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for launching daemons like History Server History Server Launch uses SparkClassCommandBuilder for launching the server. It is observed that SPARK_CLASSPATH has been removed and deprecated. For spark-submit this takes a different route and spark.driver.extraClasspath takes care of specifying additional jars in the classpath that were previously specified in the SPARK_CLASSPATH. Right now the only way specify the additional jars for launching daemons such as history server is using SPARK_DIST_CLASSPATH (https://spark.apache.org/docs/latest/hadoop-provided.html) but this I presume is a distribution classpath. It would be nice to have a similar config like spark.driver.extraClasspath for launching daemons similar to history server. Added new environment variable SPARK_DAEMON_CLASSPATH to set classpath for launching daemons. Tested and verified for History Server and Standalone Mode. ## How was this patch tested? Initially, history server start script would fail for the reason being that it could not find the required jars for launching the server in the java classpath. Same was true for running Master and Worker in standalone mode. By adding the environment variable SPARK_DAEMON_CLASSPATH to the java classpath, both the daemons(History Server, Standalone daemons) are starting up and running. Author: pgandhi Author: pgandhi999 Closes #19047 from pgandhi999/master. (cherry picked from commit 24e6c187fbaa6874eedbdda6b3b5dc6ff9e1de36) Signed-off-by: Tom Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/59bb7ebf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/59bb7ebf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/59bb7ebf Branch: refs/heads/branch-2.2 Commit: 59bb7ebfb83c292cea853d6cd6fdf9748baa6ce2 Parents: 0d4ef2f Author: pgandhi Authored: Mon Aug 28 08:51:22 2017 -0500 Committer: Tom Graves Committed: Mon Aug 28 08:51:49 2017 -0500 -- conf/spark-env.sh.template | 1 + docs/monitoring.md | 4 docs/running-on-mesos.md| 2 ++ docs/spark-standalone.md| 4 .../org/apache/spark/launcher/SparkClassCommandBuilder.java | 5 + 5 files changed, 16 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/59bb7ebf/conf/spark-env.sh.template -- diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index b7c985a..0f9150b 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -51,6 +51,7 @@ # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers # Generic options for the daemons used in the standalone deploy mode http://git-wip-us.apache.org/repos/asf/spark/blob/59bb7ebf/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index 3e577c5..d22cd94 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -62,6 +62,10 @@ The history server can be configured as follows: JVM options for the history server (default: none). +SPARK_DAEMON_CLASSPATH +Classpath for the history server (default: none). + + SPARK_PUBLIC_DNS The public address for the history server. If this is not set, links to application history http://git-wip-us.apache.org/repos/asf/spark/blob/59bb7ebf/docs/running-on-mesos.md -- diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index 847a659..6b69bfc 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -158,6 +158,8 @@ If you like to run the `MesosClusterDispatcher` with Marathon, you need to run t The `MesosClusterDispatcher` also supports writing recovery state into Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover all submitted and running containers on relaunch. In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations. For more information about these configur
spark git commit: [SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for launching daemons like History Server
Repository: spark Updated Branches: refs/heads/master 0456b4050 -> 24e6c187f [SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for launching daemons like History Server History Server Launch uses SparkClassCommandBuilder for launching the server. It is observed that SPARK_CLASSPATH has been removed and deprecated. For spark-submit this takes a different route and spark.driver.extraClasspath takes care of specifying additional jars in the classpath that were previously specified in the SPARK_CLASSPATH. Right now the only way specify the additional jars for launching daemons such as history server is using SPARK_DIST_CLASSPATH (https://spark.apache.org/docs/latest/hadoop-provided.html) but this I presume is a distribution classpath. It would be nice to have a similar config like spark.driver.extraClasspath for launching daemons similar to history server. Added new environment variable SPARK_DAEMON_CLASSPATH to set classpath for launching daemons. Tested and verified for History Server and Standalone Mode. ## How was this patch tested? Initially, history server start script would fail for the reason being that it could not find the required jars for launching the server in the java classpath. Same was true for running Master and Worker in standalone mode. By adding the environment variable SPARK_DAEMON_CLASSPATH to the java classpath, both the daemons(History Server, Standalone daemons) are starting up and running. Author: pgandhi Author: pgandhi999 Closes #19047 from pgandhi999/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/24e6c187 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/24e6c187 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/24e6c187 Branch: refs/heads/master Commit: 24e6c187fbaa6874eedbdda6b3b5dc6ff9e1de36 Parents: 0456b40 Author: pgandhi Authored: Mon Aug 28 08:51:22 2017 -0500 Committer: Tom Graves Committed: Mon Aug 28 08:51:22 2017 -0500 -- conf/spark-env.sh.template | 1 + docs/monitoring.md | 4 docs/running-on-mesos.md| 2 ++ docs/spark-standalone.md| 4 .../org/apache/spark/launcher/SparkClassCommandBuilder.java | 5 + 5 files changed, 16 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/24e6c187/conf/spark-env.sh.template -- diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template index 1663019..f8c895f 100755 --- a/conf/spark-env.sh.template +++ b/conf/spark-env.sh.template @@ -52,6 +52,7 @@ # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers # Generic options for the daemons used in the standalone deploy mode http://git-wip-us.apache.org/repos/asf/spark/blob/24e6c187/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index 3e577c5..d22cd94 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -62,6 +62,10 @@ The history server can be configured as follows: JVM options for the history server (default: none). +SPARK_DAEMON_CLASSPATH +Classpath for the history server (default: none). + + SPARK_PUBLIC_DNS The public address for the history server. If this is not set, links to application history http://git-wip-us.apache.org/repos/asf/spark/blob/24e6c187/docs/running-on-mesos.md -- diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md index 0e5a20c..c12b858 100644 --- a/docs/running-on-mesos.md +++ b/docs/running-on-mesos.md @@ -160,6 +160,8 @@ If you like to run the `MesosClusterDispatcher` with Marathon, you need to run t The `MesosClusterDispatcher` also supports writing recovery state into Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover all submitted and running containers on relaunch. In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations. For more information about these configurations please refer to the configurations [doc](configurations.html#deploy). +You can also specify any
spark git commit: [SPARK-21818][ML][MLLIB] Fix bug of MultivariateOnlineSummarizer.variance generate negative result
Repository: spark Updated Branches: refs/heads/branch-2.2 2b4bd7910 -> 0d4ef2f69 [SPARK-21818][ML][MLLIB] Fix bug of MultivariateOnlineSummarizer.variance generate negative result Because of numerical error, MultivariateOnlineSummarizer.variance is possible to generate negative variance. **This is a serious bug because many algos in MLLib** **use stddev computed from** `sqrt(variance)` **it will generate NaN and crash the whole algorithm.** we can reproduce this bug use the following code: ``` val summarizer1 = (new MultivariateOnlineSummarizer) .add(Vectors.dense(3.0), 0.7) val summarizer2 = (new MultivariateOnlineSummarizer) .add(Vectors.dense(3.0), 0.4) val summarizer3 = (new MultivariateOnlineSummarizer) .add(Vectors.dense(3.0), 0.5) val summarizer4 = (new MultivariateOnlineSummarizer) .add(Vectors.dense(3.0), 0.4) val summarizer = summarizer1 .merge(summarizer2) .merge(summarizer3) .merge(summarizer4) println(summarizer.variance(0)) ``` This PR fix the bugs in `mllib.stat.MultivariateOnlineSummarizer.variance` and `ml.stat.SummarizerBuffer.variance`, and several places in `WeightedLeastSquares` test cases added. Author: WeichenXu Closes #19029 from WeichenXu123/fix_summarizer_var_bug. (cherry picked from commit 0456b4050817e64f27824720e695bbfff738d474) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0d4ef2f6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0d4ef2f6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0d4ef2f6 Branch: refs/heads/branch-2.2 Commit: 0d4ef2f690e378cade0a3ec84d535a535dc20dfc Parents: 2b4bd79 Author: WeichenXu Authored: Mon Aug 28 07:41:42 2017 +0100 Committer: Sean Owen Committed: Mon Aug 28 08:00:29 2017 +0100 -- .../spark/ml/optim/WeightedLeastSquares.scala | 12 +--- .../mllib/stat/MultivariateOnlineSummarizer.scala | 5 +++-- .../stat/MultivariateOnlineSummarizerSuite.scala | 18 ++ 3 files changed, 30 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0d4ef2f6/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala index 56ab967..c5c9c8e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala @@ -440,7 +440,11 @@ private[ml] object WeightedLeastSquares { /** * Weighted population standard deviation of labels. */ -def bStd: Double = math.sqrt(bbSum / wSum - bBar * bBar) +def bStd: Double = { + // We prevent variance from negative value caused by numerical error. + val variance = math.max(bbSum / wSum - bBar * bBar, 0.0) + math.sqrt(variance) +} /** * Weighted mean of (label * features). @@ -471,7 +475,8 @@ private[ml] object WeightedLeastSquares { while (i < triK) { val l = j - 2 val aw = aSum(l) / wSum -std(l) = math.sqrt(aaValues(i) / wSum - aw * aw) +// We prevent variance from negative value caused by numerical error. +std(l) = math.sqrt(math.max(aaValues(i) / wSum - aw * aw, 0.0)) i += j j += 1 } @@ -489,7 +494,8 @@ private[ml] object WeightedLeastSquares { while (i < triK) { val l = j - 2 val aw = aSum(l) / wSum -variance(l) = aaValues(i) / wSum - aw * aw +// We prevent variance from negative value caused by numerical error. +variance(l) = math.max(aaValues(i) / wSum - aw * aw, 0.0) i += j j += 1 } http://git-wip-us.apache.org/repos/asf/spark/blob/0d4ef2f6/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala index 7dc0c45..8121880 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala @@ -213,8 +213,9 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S var i = 0 val len = currM2n.length while (i < len) { -realVariance(i) = (currM2n(i) + deltaMean(i) * deltaMean(i) * weightSum(i) * - (totalWeightSum - weightSum(i)) / totalWeightS