[3/3] spark-website git commit: Add the news about spark-summit-eu-2017 agenda

2017-08-28 Thread yhuai
Add the news about spark-summit-eu-2017 agenda


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/35eb1471
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/35eb1471
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/35eb1471

Branch: refs/heads/asf-site
Commit: 35eb1471704a97c18e96b46f2495a7117565466d
Parents: cca972e
Author: Yin Huai 
Authored: Mon Aug 28 22:40:10 2017 +
Committer: Yin Huai 
Committed: Mon Aug 28 15:54:26 2017 -0700

--
 ...-08-28-spark-summit-eu-2017-agenda-posted.md |  17 ++
 site/committers.html|   6 +-
 site/community.html |   6 +-
 site/contributing.html  |   6 +-
 site/developer-tools.html   |   6 +-
 site/documentation.html |   6 +-
 site/downloads.html |   6 +-
 site/examples.html  |   6 +-
 site/faq.html   |   6 +-
 site/graphx/index.html  |   6 +-
 site/improvement-proposals.html |   6 +-
 site/index.html |   6 +-
 site/mailing-lists.html |   6 +-
 site/mllib/index.html   |   6 +-
 site/news/amp-camp-2013-registration-ope.html   |   6 +-
 .../news/announcing-the-first-spark-summit.html |   6 +-
 .../news/fourth-spark-screencast-published.html |   6 +-
 site/news/index.html|  16 +-
 site/news/nsdi-paper.html   |   6 +-
 site/news/one-month-to-spark-summit-2015.html   |   6 +-
 .../proposals-open-for-spark-summit-east.html   |   6 +-
 ...registration-open-for-spark-summit-east.html |   6 +-
 .../news/run-spark-and-shark-on-amazon-emr.html |   6 +-
 site/news/spark-0-6-1-and-0-5-2-released.html   |   6 +-
 site/news/spark-0-6-2-released.html |   6 +-
 site/news/spark-0-7-0-released.html |   6 +-
 site/news/spark-0-7-2-released.html |   6 +-
 site/news/spark-0-7-3-released.html |   6 +-
 site/news/spark-0-8-0-released.html |   6 +-
 site/news/spark-0-8-1-released.html |   6 +-
 site/news/spark-0-9-0-released.html |   6 +-
 site/news/spark-0-9-1-released.html |   6 +-
 site/news/spark-0-9-2-released.html |   6 +-
 site/news/spark-1-0-0-released.html |   6 +-
 site/news/spark-1-0-1-released.html |   6 +-
 site/news/spark-1-0-2-released.html |   6 +-
 site/news/spark-1-1-0-released.html |   6 +-
 site/news/spark-1-1-1-released.html |   6 +-
 site/news/spark-1-2-0-released.html |   6 +-
 site/news/spark-1-2-1-released.html |   6 +-
 site/news/spark-1-2-2-released.html |   6 +-
 site/news/spark-1-3-0-released.html |   6 +-
 site/news/spark-1-4-0-released.html |   6 +-
 site/news/spark-1-4-1-released.html |   6 +-
 site/news/spark-1-5-0-released.html |   6 +-
 site/news/spark-1-5-1-released.html |   6 +-
 site/news/spark-1-5-2-released.html |   6 +-
 site/news/spark-1-6-0-released.html |   6 +-
 site/news/spark-1-6-1-released.html |   6 +-
 site/news/spark-1-6-2-released.html |   6 +-
 site/news/spark-1-6-3-released.html |   6 +-
 site/news/spark-2-0-0-released.html |   6 +-
 site/news/spark-2-0-1-released.html |   6 +-
 site/news/spark-2-0-2-released.html |   6 +-
 site/news/spark-2-1-0-released.html |   6 +-
 site/news/spark-2-1-1-released.html |   6 +-
 site/news/spark-2-2-0-released.html |   6 +-
 site/news/spark-2.0.0-preview.html  |   6 +-
 .../spark-accepted-into-apache-incubator.html   |   6 +-
 site/news/spark-and-shark-in-the-news.html  |   6 +-
 site/news/spark-becomes-tlp.html|   6 +-
 site/news/spark-featured-in-wired.html  |   6 +-
 .../spark-mailing-lists-moving-to-apache.html   |   6 +-
 site/news/spark-meetups.html|   6 +-
 site/news/spark-screencasts-published.html  |   6 +-
 site/news/spark-summit-2013-is-a-wrap.html  |   6 +-
 site/news/spark-summit-2014-videos-posted.html  |   6 +-
 site/news/spark-summit-2015-videos-posted.html  |   6 +-
 site/news/spark-summit-agenda-posted.html   |   6 +-
 .../spark-summit-east-2015-videos-posted.html   |   6 +-
 .../spark-summit-east-2016-cfp-closing.html |   6 +-
 .../spark-summit-east-2017-agenda-posted.html   |   6 +-
 site/news/spark-summit-east-agenda-posted.html  |   6 +-
 .../spark-summit-eu-2017-agenda-posted.html | 223 +++
 .../news/spark-summit-europe-agenda-posted.html |   6 +-
 sit

[1/3] spark-website git commit: Add the news about spark-summit-eu-2017 agenda

2017-08-28 Thread yhuai
Repository: spark-website
Updated Branches:
  refs/heads/asf-site cca972e7f -> 35eb14717


http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-3-0.html
--
diff --git a/site/releases/spark-release-1-3-0.html 
b/site/releases/spark-release-1-3-0.html
index 10d934b..5e4d302 100644
--- a/site/releases/spark-release-1-3-0.html
+++ b/site/releases/spark-release-1-3-0.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-3-1.html
--
diff --git a/site/releases/spark-release-1-3-1.html 
b/site/releases/spark-release-1-3-1.html
index 7df8028..116898f 100644
--- a/site/releases/spark-release-1-3-1.html
+++ b/site/releases/spark-release-1-3-1.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-4-0.html
--
diff --git a/site/releases/spark-release-1-4-0.html 
b/site/releases/spark-release-1-4-0.html
index 143cc17..b75a496 100644
--- a/site/releases/spark-release-1-4-0.html
+++ b/site/releases/spark-release-1-4-0.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-4-1.html
--
diff --git a/site/releases/spark-release-1-4-1.html 
b/site/releases/spark-release-1-4-1.html
index ccdd161..30b92fd 100644
--- a/site/releases/spark-release-1-4-1.html
+++ b/site/releases/spark-release-1-4-1.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-5-0.html
--
diff --git a/site/releases/spark-release-1-5-0.html 
b/site/releases/spark-release-1-5-0.html
index f73ab5d..6e1411d 100644
--- a/site/releases/spark-release-1-5-0.html
+++ b/site/releases/spark-release-1-5-0.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/releases/spark-release-1-5-1.html
--
diff --git a/site/releases/spark-release-1-5-1.html 
b/site/releases/spark-release-1-5-1.html
index 3af892e..b447dd7 100644
--- a/site/releases/spark-release-1-5-1.html
+++ b/site/releases/spark-release-1-5

[2/3] spark-website git commit: Add the news about spark-summit-eu-2017 agenda

2017-08-28 Thread yhuai
http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-accepted-into-apache-incubator.html
--
diff --git a/site/news/spark-accepted-into-apache-incubator.html 
b/site/news/spark-accepted-into-apache-incubator.html
index 62638f2..a4a913f 100644
--- a/site/news/spark-accepted-into-apache-incubator.html
+++ b/site/news/spark-accepted-into-apache-incubator.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-and-shark-in-the-news.html
--
diff --git a/site/news/spark-and-shark-in-the-news.html 
b/site/news/spark-and-shark-in-the-news.html
index 4a0c4fc..55d2ade 100644
--- a/site/news/spark-and-shark-in-the-news.html
+++ b/site/news/spark-and-shark-in-the-news.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-becomes-tlp.html
--
diff --git a/site/news/spark-becomes-tlp.html b/site/news/spark-becomes-tlp.html
index 6c76d20..0f17857 100644
--- a/site/news/spark-becomes-tlp.html
+++ b/site/news/spark-becomes-tlp.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-featured-in-wired.html
--
diff --git a/site/news/spark-featured-in-wired.html 
b/site/news/spark-featured-in-wired.html
index 1d35e40..1c0b69a 100644
--- a/site/news/spark-featured-in-wired.html
+++ b/site/news/spark-featured-in-wired.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-mailing-lists-moving-to-apache.html
--
diff --git a/site/news/spark-mailing-lists-moving-to-apache.html 
b/site/news/spark-mailing-lists-moving-to-apache.html
index b586b65..4e12162 100644
--- a/site/news/spark-mailing-lists-moving-to-apache.html
+++ b/site/news/spark-mailing-lists-moving-to-apache.html
@@ -161,6 +161,9 @@
   Latest News
   
 
+  Spark 
Summit Europe (October 24-26th, 2017, Dublin, Ireland) agenda posted
+  (Aug 28, 2017)
+
   Spark 2.2.0 
released
   (Jul 11, 2017)
 
@@ -170,9 +173,6 @@
   Spark 
Summit (June 5-7th, 2017, San Francisco) agenda posted
   (Mar 31, 2017)
 
-  Spark 
Summit East (Feb 7-9th, 2017, Boston) agenda posted
-  (Jan 04, 2017)
-
   
   Archive
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/35eb1471/site/news/spark-meetups.html
--
diff --git a/site/news/spark-meetups.html b/site/news/spark-meetups.html
index 4de6525..92da537 100644
--- a/site/news/spark-meetups.html
+++ b/site/news/spark-meetups.html
@@ -161,6 +161,

spark git commit: [SPARK-17139][ML] Add model summary for MultinomialLogisticRegression

2017-08-28 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/master 73e64f7d5 -> c7270a46f


[SPARK-17139][ML] Add model summary for MultinomialLogisticRegression

## What changes were proposed in this pull request?

Add 4 traits, using the following hierarchy:
LogisticRegressionSummary
LogisticRegressionTrainingSummary: LogisticRegressionSummary
BinaryLogisticRegressionSummary: LogisticRegressionSummary
BinaryLogisticRegressionTrainingSummary: LogisticRegressionTrainingSummary, 
BinaryLogisticRegressionSummary

and the public method such as `def summary` only return trait type listed above.

and then implement 4 concrete classes:
LogisticRegressionSummaryImpl (multiclass case)
LogisticRegressionTrainingSummaryImpl (multiclass case)
BinaryLogisticRegressionSummaryImpl (binary case).
BinaryLogisticRegressionTrainingSummaryImpl (binary case).

## How was this patch tested?

Existing tests & added tests.

Author: WeichenXu 

Closes #15435 from WeichenXu123/mlor_summary.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c7270a46
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c7270a46
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c7270a46

Branch: refs/heads/master
Commit: c7270a46fc340db62c87ddfc6568603d0b832845
Parents: 73e64f7
Author: Weichen Xu 
Authored: Mon Aug 28 13:31:01 2017 -0700
Committer: Joseph K. Bradley 
Committed: Mon Aug 28 13:31:01 2017 -0700

--
 .../ml/classification/LogisticRegression.scala  | 340 +++
 .../LogisticRegressionSuite.scala   | 160 +++--
 .../ml/regression/LinearRegressionSuite.scala   |   2 +-
 project/MimaExcludes.scala  |  21 +-
 4 files changed, 412 insertions(+), 111 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c7270a46/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 21957d9..ffe4b52 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import scala.collection.mutable
 
 import breeze.linalg.{DenseVector => BDV}
-import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => 
BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN}
+import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => 
BreezeLBFGSB, OWLQN => BreezeOWLQN}
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
@@ -35,7 +35,7 @@ import org.apache.spark.ml.optim.loss.{L2Regularization, 
RDDLossFunction}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
-import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, 
MulticlassMetrics}
 import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.mllib.util.MLUtils
@@ -882,21 +882,28 @@ class LogisticRegression @Since("1.2.0") (
 
 val model = copyValues(new LogisticRegressionModel(uid, coefficientMatrix, 
interceptVector,
   numClasses, isMultinomial))
-// TODO: implement summary model for multinomial case
-val m = if (!isMultinomial) {
-  val (summaryModel, probabilityColName) = 
model.findSummaryModelAndProbabilityCol()
-  val logRegSummary = new BinaryLogisticRegressionTrainingSummary(
+
+val (summaryModel, probabilityColName, predictionColName) = 
model.findSummaryModel()
+val logRegSummary = if (numClasses <= 2) {
+  new BinaryLogisticRegressionTrainingSummaryImpl(
 summaryModel.transform(dataset),
 probabilityColName,
+predictionColName,
 $(labelCol),
 $(featuresCol),
 objectiveHistory)
-  model.setSummary(Some(logRegSummary))
 } else {
-  model
+  new LogisticRegressionTrainingSummaryImpl(
+summaryModel.transform(dataset),
+probabilityColName,
+predictionColName,
+$(labelCol),
+$(featuresCol),
+objectiveHistory)
 }
-instr.logSuccess(m)
-m
+model.setSummary(Some(logRegSummary))
+instr.logSuccess(model)
+model
   }
 
   @Since("1.4.0")
@@ -1010,8 +1017,8 @@ class LogisticRegressionModel private[spark] (
   private var trainingSummary: Option[LogisticRegressionTrainingSummary] = None
 
   /**
-   * Gets summary of model on training set. An exception is
-   * thrown if `trainingSumm

spark git commit: [SPARK-19662][SCHEDULER][TEST] Add Fair Scheduler Unit Test coverage for different build cases

2017-08-28 Thread irashid
Repository: spark
Updated Branches:
  refs/heads/master 24e6c187f -> 73e64f7d5


[SPARK-19662][SCHEDULER][TEST] Add Fair Scheduler Unit Test coverage for 
different build cases

## What changes were proposed in this pull request?
Fair Scheduler can be built via one of the following options:
- By setting a `spark.scheduler.allocation.file` property,
- By setting `fairscheduler.xml` into classpath.

These options are checked **in order** and fair-scheduler is built via first 
found option. If invalid path is found, `FileNotFoundException` will be 
expected.

This PR aims unit test coverage of these use cases and a minor documentation 
change has been added for second option(`fairscheduler.xml` into classpath) to 
inform the users.

Also, this PR was related with #16813 and has been created separately to keep 
patch content as isolated and to help the reviewers.

## How was this patch tested?
Added new Unit Tests.

Author: erenavsarogullari 

Closes #16992 from erenavsarogullari/SPARK-19662.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73e64f7d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73e64f7d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73e64f7d

Branch: refs/heads/master
Commit: 73e64f7d50ba7a8469bd76f97e0a22fad41c2caa
Parents: 24e6c18
Author: erenavsarogullari 
Authored: Mon Aug 28 14:54:00 2017 -0500
Committer: Imran Rashid 
Committed: Mon Aug 28 14:54:00 2017 -0500

--
 .../resources/fairscheduler-with-valid-data.xml | 35 
 .../org/apache/spark/scheduler/PoolSuite.scala  | 44 
 docs/job-scheduling.md  |  2 +-
 3 files changed, 80 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/73e64f7d/core/src/test/resources/fairscheduler-with-valid-data.xml
--
diff --git a/core/src/test/resources/fairscheduler-with-valid-data.xml 
b/core/src/test/resources/fairscheduler-with-valid-data.xml
new file mode 100644
index 000..3d88233
--- /dev/null
+++ b/core/src/test/resources/fairscheduler-with-valid-data.xml
@@ -0,0 +1,35 @@
+
+
+
+
+
+3
+1
+FIFO
+
+
+4
+2
+FAIR
+
+
+2
+3
+FAIR
+
+
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/73e64f7d/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala 
b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
index 4901062..5bd3955 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.scheduler
 
+import java.io.FileNotFoundException
 import java.util.Properties
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, 
SparkFunSuite}
@@ -292,6 +293,49 @@ class PoolSuite extends SparkFunSuite with 
LocalSparkContext {
 }
   }
 
+  test("Fair Scheduler should build fair scheduler when " +
+"valid spark.scheduler.allocation.file property is set") {
+val xmlPath = 
getClass.getClassLoader.getResource("fairscheduler-with-valid-data.xml").getFile()
+val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, xmlPath)
+sc = new SparkContext(LOCAL, APP_NAME, conf)
+
+val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
+val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
+schedulableBuilder.buildPools()
+
+verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO)
+verifyPool(rootPool, "pool1", 3, 1, FIFO)
+verifyPool(rootPool, "pool2", 4, 2, FAIR)
+verifyPool(rootPool, "pool3", 2, 3, FAIR)
+  }
+
+  test("Fair Scheduler should use default file(fairscheduler.xml) if it exists 
in classpath " +
+"and spark.scheduler.allocation.file property is not set") {
+val conf = new SparkConf()
+sc = new SparkContext(LOCAL, APP_NAME, conf)
+
+val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
+val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
+schedulableBuilder.buildPools()
+
+verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO)
+verifyPool(rootPool, "1", 2, 1, FIFO)
+verifyPool(rootPool, "2", 3, 1, FIFO)
+verifyPool(rootPool, "3", 0, 1, FIFO)
+  }
+
+  test("Fair Scheduler should throw FileNotFoundException " +
+"when invalid spark.scheduler.allocation.file property is set") {
+val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, 
"INVALID_FILE_PATH")
+sc = new SparkContext(LO

spark git commit: [SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for launching daemons like History Server

2017-08-28 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 0d4ef2f69 -> 59bb7ebfb


[SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for 
launching daemons like History Server

History Server Launch uses SparkClassCommandBuilder for launching the server. 
It is observed that SPARK_CLASSPATH has been removed and deprecated. For 
spark-submit this takes a different route and spark.driver.extraClasspath takes 
care of specifying additional jars in the classpath that were previously 
specified in the SPARK_CLASSPATH. Right now the only way specify the additional 
jars for launching daemons such as history server is using SPARK_DIST_CLASSPATH 
(https://spark.apache.org/docs/latest/hadoop-provided.html) but this I presume 
is a distribution classpath. It would be nice to have a similar config like 
spark.driver.extraClasspath for launching daemons similar to history server.

Added new environment variable SPARK_DAEMON_CLASSPATH to set classpath for 
launching daemons. Tested and verified for History Server and Standalone Mode.

## How was this patch tested?
Initially, history server start script would fail for the reason being that it 
could not find the required jars for launching the server in the java 
classpath. Same was true for running Master and Worker in standalone mode. By 
adding the environment variable SPARK_DAEMON_CLASSPATH to the java classpath, 
both the daemons(History Server, Standalone daemons) are starting up and 
running.

Author: pgandhi 
Author: pgandhi999 

Closes #19047 from pgandhi999/master.

(cherry picked from commit 24e6c187fbaa6874eedbdda6b3b5dc6ff9e1de36)
Signed-off-by: Tom Graves 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/59bb7ebf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/59bb7ebf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/59bb7ebf

Branch: refs/heads/branch-2.2
Commit: 59bb7ebfb83c292cea853d6cd6fdf9748baa6ce2
Parents: 0d4ef2f
Author: pgandhi 
Authored: Mon Aug 28 08:51:22 2017 -0500
Committer: Tom Graves 
Committed: Mon Aug 28 08:51:49 2017 -0500

--
 conf/spark-env.sh.template  | 1 +
 docs/monitoring.md  | 4 
 docs/running-on-mesos.md| 2 ++
 docs/spark-standalone.md| 4 
 .../org/apache/spark/launcher/SparkClassCommandBuilder.java | 5 +
 5 files changed, 16 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/59bb7ebf/conf/spark-env.sh.template
--
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index b7c985a..0f9150b 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -51,6 +51,7 @@
 # - SPARK_HISTORY_OPTS, to set config properties only for the history server 
(e.g. "-Dx=y")
 # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle 
service (e.g. "-Dx=y")
 # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. 
"-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
 # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
 
 # Generic options for the daemons used in the standalone deploy mode

http://git-wip-us.apache.org/repos/asf/spark/blob/59bb7ebf/docs/monitoring.md
--
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 3e577c5..d22cd94 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -62,6 +62,10 @@ The history server can be configured as follows:
 JVM options for the history server (default: none).
   
   
+SPARK_DAEMON_CLASSPATH
+Classpath for the history server (default: none).
+  
+  
 SPARK_PUBLIC_DNS
 
   The public address for the history server. If this is not set, links to 
application history

http://git-wip-us.apache.org/repos/asf/spark/blob/59bb7ebf/docs/running-on-mesos.md
--
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 847a659..6b69bfc 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -158,6 +158,8 @@ If you like to run the `MesosClusterDispatcher` with 
Marathon, you need to run t
 The `MesosClusterDispatcher` also supports writing recovery state into 
Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover 
all submitted and running containers on relaunch.   In order to enable this 
recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring 
`spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations.
 For more information about these configur

spark git commit: [SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for launching daemons like History Server

2017-08-28 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 0456b4050 -> 24e6c187f


[SPARK-21798] No config to replace deprecated SPARK_CLASSPATH config for 
launching daemons like History Server

History Server Launch uses SparkClassCommandBuilder for launching the server. 
It is observed that SPARK_CLASSPATH has been removed and deprecated. For 
spark-submit this takes a different route and spark.driver.extraClasspath takes 
care of specifying additional jars in the classpath that were previously 
specified in the SPARK_CLASSPATH. Right now the only way specify the additional 
jars for launching daemons such as history server is using SPARK_DIST_CLASSPATH 
(https://spark.apache.org/docs/latest/hadoop-provided.html) but this I presume 
is a distribution classpath. It would be nice to have a similar config like 
spark.driver.extraClasspath for launching daemons similar to history server.

Added new environment variable SPARK_DAEMON_CLASSPATH to set classpath for 
launching daemons. Tested and verified for History Server and Standalone Mode.

## How was this patch tested?
Initially, history server start script would fail for the reason being that it 
could not find the required jars for launching the server in the java 
classpath. Same was true for running Master and Worker in standalone mode. By 
adding the environment variable SPARK_DAEMON_CLASSPATH to the java classpath, 
both the daemons(History Server, Standalone daemons) are starting up and 
running.

Author: pgandhi 
Author: pgandhi999 

Closes #19047 from pgandhi999/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/24e6c187
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/24e6c187
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/24e6c187

Branch: refs/heads/master
Commit: 24e6c187fbaa6874eedbdda6b3b5dc6ff9e1de36
Parents: 0456b40
Author: pgandhi 
Authored: Mon Aug 28 08:51:22 2017 -0500
Committer: Tom Graves 
Committed: Mon Aug 28 08:51:22 2017 -0500

--
 conf/spark-env.sh.template  | 1 +
 docs/monitoring.md  | 4 
 docs/running-on-mesos.md| 2 ++
 docs/spark-standalone.md| 4 
 .../org/apache/spark/launcher/SparkClassCommandBuilder.java | 5 +
 5 files changed, 16 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/24e6c187/conf/spark-env.sh.template
--
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 1663019..f8c895f 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -52,6 +52,7 @@
 # - SPARK_HISTORY_OPTS, to set config properties only for the history server 
(e.g. "-Dx=y")
 # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle 
service (e.g. "-Dx=y")
 # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. 
"-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
 # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
 
 # Generic options for the daemons used in the standalone deploy mode

http://git-wip-us.apache.org/repos/asf/spark/blob/24e6c187/docs/monitoring.md
--
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 3e577c5..d22cd94 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -62,6 +62,10 @@ The history server can be configured as follows:
 JVM options for the history server (default: none).
   
   
+SPARK_DAEMON_CLASSPATH
+Classpath for the history server (default: none).
+  
+  
 SPARK_PUBLIC_DNS
 
   The public address for the history server. If this is not set, links to 
application history

http://git-wip-us.apache.org/repos/asf/spark/blob/24e6c187/docs/running-on-mesos.md
--
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 0e5a20c..c12b858 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -160,6 +160,8 @@ If you like to run the `MesosClusterDispatcher` with 
Marathon, you need to run t
 The `MesosClusterDispatcher` also supports writing recovery state into 
Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover 
all submitted and running containers on relaunch.   In order to enable this 
recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring 
`spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations.
 For more information about these configurations please refer to the 
configurations [doc](configurations.html#deploy).
 
+You can also specify any

spark git commit: [SPARK-21818][ML][MLLIB] Fix bug of MultivariateOnlineSummarizer.variance generate negative result

2017-08-28 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 2b4bd7910 -> 0d4ef2f69


[SPARK-21818][ML][MLLIB] Fix bug of MultivariateOnlineSummarizer.variance 
generate negative result

Because of numerical error, MultivariateOnlineSummarizer.variance is possible 
to generate negative variance.

**This is a serious bug because many algos in MLLib**
**use stddev computed from** `sqrt(variance)`
**it will generate NaN and crash the whole algorithm.**

we can reproduce this bug use the following code:
```
val summarizer1 = (new MultivariateOnlineSummarizer)
  .add(Vectors.dense(3.0), 0.7)
val summarizer2 = (new MultivariateOnlineSummarizer)
  .add(Vectors.dense(3.0), 0.4)
val summarizer3 = (new MultivariateOnlineSummarizer)
  .add(Vectors.dense(3.0), 0.5)
val summarizer4 = (new MultivariateOnlineSummarizer)
  .add(Vectors.dense(3.0), 0.4)

val summarizer = summarizer1
  .merge(summarizer2)
  .merge(summarizer3)
  .merge(summarizer4)

println(summarizer.variance(0))
```
This PR fix the bugs in `mllib.stat.MultivariateOnlineSummarizer.variance` and 
`ml.stat.SummarizerBuffer.variance`, and several places in 
`WeightedLeastSquares`

test cases added.

Author: WeichenXu 

Closes #19029 from WeichenXu123/fix_summarizer_var_bug.

(cherry picked from commit 0456b4050817e64f27824720e695bbfff738d474)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0d4ef2f6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0d4ef2f6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0d4ef2f6

Branch: refs/heads/branch-2.2
Commit: 0d4ef2f690e378cade0a3ec84d535a535dc20dfc
Parents: 2b4bd79
Author: WeichenXu 
Authored: Mon Aug 28 07:41:42 2017 +0100
Committer: Sean Owen 
Committed: Mon Aug 28 08:00:29 2017 +0100

--
 .../spark/ml/optim/WeightedLeastSquares.scala | 12 +---
 .../mllib/stat/MultivariateOnlineSummarizer.scala |  5 +++--
 .../stat/MultivariateOnlineSummarizerSuite.scala  | 18 ++
 3 files changed, 30 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0d4ef2f6/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala 
b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 56ab967..c5c9c8e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -440,7 +440,11 @@ private[ml] object WeightedLeastSquares {
 /**
  * Weighted population standard deviation of labels.
  */
-def bStd: Double = math.sqrt(bbSum / wSum - bBar * bBar)
+def bStd: Double = {
+  // We prevent variance from negative value caused by numerical error.
+  val variance = math.max(bbSum / wSum - bBar * bBar, 0.0)
+  math.sqrt(variance)
+}
 
 /**
  * Weighted mean of (label * features).
@@ -471,7 +475,8 @@ private[ml] object WeightedLeastSquares {
   while (i < triK) {
 val l = j - 2
 val aw = aSum(l) / wSum
-std(l) = math.sqrt(aaValues(i) / wSum - aw * aw)
+// We prevent variance from negative value caused by numerical error.
+std(l) = math.sqrt(math.max(aaValues(i) / wSum - aw * aw, 0.0))
 i += j
 j += 1
   }
@@ -489,7 +494,8 @@ private[ml] object WeightedLeastSquares {
   while (i < triK) {
 val l = j - 2
 val aw = aSum(l) / wSum
-variance(l) = aaValues(i) / wSum - aw * aw
+// We prevent variance from negative value caused by numerical error.
+variance(l) = math.max(aaValues(i) / wSum - aw * aw, 0.0)
 i += j
 j += 1
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/0d4ef2f6/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 7dc0c45..8121880 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -213,8 +213,9 @@ class MultivariateOnlineSummarizer extends 
MultivariateStatisticalSummary with S
   var i = 0
   val len = currM2n.length
   while (i < len) {
-realVariance(i) = (currM2n(i) + deltaMean(i) * deltaMean(i) * 
weightSum(i) *
-  (totalWeightSum - weightSum(i)) / totalWeightS