spark git commit: Revert "[SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader"

2017-07-12 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 39eba3053 -> cf0719b5e


Revert "[SPARK-18646][REPL] Set parent classloader as null for 
ExecutorClassLoader"

This reverts commit 39eba3053ac99f03d9df56471bae5fc5cc9f4462.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf0719b5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf0719b5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf0719b5

Branch: refs/heads/branch-2.2
Commit: cf0719b5e99333b28bb4066b304dbcf8400c80ea
Parents: 39eba30
Author: Wenchen Fan 
Authored: Thu Jul 13 08:34:42 2017 +0800
Committer: Wenchen Fan 
Committed: Thu Jul 13 08:34:42 2017 +0800

--
 .../apache/spark/repl/ExecutorClassLoader.scala | 17 +++-
 .../spark/repl/ExecutorClassLoaderSuite.scala   | 46 
 2 files changed, 6 insertions(+), 57 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cf0719b5/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
--
diff --git 
a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala 
b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index 127f673..df13b32 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,23 +33,18 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
 /**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used 
to load classes
- * defined by the interpreter when the REPL is used. Allows the user to 
specify if user class path
- * should be first. This class loader delegates getting/finding resources to 
parent loader, which
- * makes sense until REPL never provide resource dynamically.
- *
- * Note: [[ClassLoader]] will preferentially load class from parent. Only when 
parent is null or
- * the load failed, that it will call the overridden `findClass` function. To 
avoid the potential
- * issue caused by loading class using inappropriate class loader, we should 
set the parent of
- * ClassLoader to null, so that we can fully control which class loader is 
used. For detailed
- * discussion, see SPARK-18646.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
+ * used to load classes defined by the interpreter when the REPL is used.
+ * Allows the user to specify if user class path should be first.
+ * This class loader delegates getting/finding resources to parent loader,
+ * which makes sense until REPL never provide resource dynamically.
  */
 class ExecutorClassLoader(
 conf: SparkConf,
 env: SparkEnv,
 classUri: String,
 parent: ClassLoader,
-userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
+userClassPathFirst: Boolean) extends ClassLoader with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 

http://git-wip-us.apache.org/repos/asf/spark/blob/cf0719b5/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
--
diff --git 
a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala 
b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 092d3c2..6d274bd 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,8 +23,6 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
-import java.util.Collections
-import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 import scala.language.implicitConversions
@@ -79,50 +77,6 @@ class ExecutorClassLoaderSuite
 }
   }
 
-  test("child over system classloader") {
-// JavaFileObject for scala.Option class
-val scalaOptionFile = new SimpleJavaFileObject(
-  URI.create(s"string:///scala/Option.java"),
-  JavaFileObject.Kind.SOURCE) {
-
-  override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence 
= {
-"package scala; class Option {}"
-  }
-}
-// compile fake scala.Option class
-ToolProvider
-  .getSystemJavaCompiler
-  .getTask(null, null, null, null, null, 
Collections.singletonList(scalaOptionFile)).call()
-
-// create 'scala' dir in tempDir1
-val scalaDir = new File(tempDir1, "scala")
-assert(scalaDir.mkdir(), s"Failed to create 'scala' directory in 
$tempDir1")
-
-// move the generated class into scala dir
-val filename = "Option.class"
-val res

spark git commit: [SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader

2017-07-12 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master 780586a9f -> e08d06b37


[SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader

## What changes were proposed in this pull request?

`ClassLoader` will preferentially load class from `parent`. Only when `parent` 
is null or the load failed, that it will call the overridden `findClass` 
function. To avoid the potential issue caused by loading class using 
inappropriate class loader, we should set the `parent` of `ClassLoader` to 
null, so that we can fully control which class loader is used.

This is take over of #17074,  the primary author of this PR is taroplus .

Should close #17074 after this PR get merged.

## How was this patch tested?

Add test case in `ExecutorClassLoaderSuite`.

Author: Kohki Nishio 
Author: Xingbo Jiang 

Closes #18614 from jiangxb1987/executor_classloader.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e08d06b3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e08d06b3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e08d06b3

Branch: refs/heads/master
Commit: e08d06b37bc96cc48fec1c5e40f73e0bca09c616
Parents: 780586a
Author: Kohki Nishio 
Authored: Thu Jul 13 08:22:40 2017 +0800
Committer: Wenchen Fan 
Committed: Thu Jul 13 08:22:40 2017 +0800

--
 .../apache/spark/repl/ExecutorClassLoader.scala | 17 +---
 .../spark/repl/ExecutorClassLoaderSuite.scala   | 46 
 2 files changed, 57 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e08d06b3/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
--
diff --git 
a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala 
b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index df13b32..127f673 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,18 +33,23 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
 /**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
- * used to load classes defined by the interpreter when the REPL is used.
- * Allows the user to specify if user class path should be first.
- * This class loader delegates getting/finding resources to parent loader,
- * which makes sense until REPL never provide resource dynamically.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used 
to load classes
+ * defined by the interpreter when the REPL is used. Allows the user to 
specify if user class path
+ * should be first. This class loader delegates getting/finding resources to 
parent loader, which
+ * makes sense until REPL never provide resource dynamically.
+ *
+ * Note: [[ClassLoader]] will preferentially load class from parent. Only when 
parent is null or
+ * the load failed, that it will call the overridden `findClass` function. To 
avoid the potential
+ * issue caused by loading class using inappropriate class loader, we should 
set the parent of
+ * ClassLoader to null, so that we can fully control which class loader is 
used. For detailed
+ * discussion, see SPARK-18646.
  */
 class ExecutorClassLoader(
 conf: SparkConf,
 env: SparkEnv,
 classUri: String,
 parent: ClassLoader,
-userClassPathFirst: Boolean) extends ClassLoader with Logging {
+userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e08d06b3/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
--
diff --git 
a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala 
b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 6d274bd..092d3c2 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,6 +23,8 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
+import java.util.Collections
+import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 import scala.language.implicitConversions
@@ -77,6 +79,50 @@ class ExecutorClassLoaderSuite
 }
   }
 
+  test("child over system classloader") {
+// JavaFileObject for scala.Option class
+val scalaOptionFile = new SimpleJavaFileObject(
+  URI.create(s"string:///scala/Option.java"),
+ 

spark git commit: [SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader

2017-07-12 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 cb6fc89ba -> 39eba3053


[SPARK-18646][REPL] Set parent classloader as null for ExecutorClassLoader

## What changes were proposed in this pull request?

`ClassLoader` will preferentially load class from `parent`. Only when `parent` 
is null or the load failed, that it will call the overridden `findClass` 
function. To avoid the potential issue caused by loading class using 
inappropriate class loader, we should set the `parent` of `ClassLoader` to 
null, so that we can fully control which class loader is used.

This is take over of #17074,  the primary author of this PR is taroplus .

Should close #17074 after this PR get merged.

## How was this patch tested?

Add test case in `ExecutorClassLoaderSuite`.

Author: Kohki Nishio 
Author: Xingbo Jiang 

Closes #18614 from jiangxb1987/executor_classloader.

(cherry picked from commit e08d06b37bc96cc48fec1c5e40f73e0bca09c616)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39eba305
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39eba305
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39eba305

Branch: refs/heads/branch-2.2
Commit: 39eba3053ac99f03d9df56471bae5fc5cc9f4462
Parents: cb6fc89
Author: Kohki Nishio 
Authored: Thu Jul 13 08:22:40 2017 +0800
Committer: Wenchen Fan 
Committed: Thu Jul 13 08:22:53 2017 +0800

--
 .../apache/spark/repl/ExecutorClassLoader.scala | 17 +---
 .../spark/repl/ExecutorClassLoaderSuite.scala   | 46 
 2 files changed, 57 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/39eba305/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
--
diff --git 
a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala 
b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index df13b32..127f673 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,18 +33,23 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
 /**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
- * used to load classes defined by the interpreter when the REPL is used.
- * Allows the user to specify if user class path should be first.
- * This class loader delegates getting/finding resources to parent loader,
- * which makes sense until REPL never provide resource dynamically.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used 
to load classes
+ * defined by the interpreter when the REPL is used. Allows the user to 
specify if user class path
+ * should be first. This class loader delegates getting/finding resources to 
parent loader, which
+ * makes sense until REPL never provide resource dynamically.
+ *
+ * Note: [[ClassLoader]] will preferentially load class from parent. Only when 
parent is null or
+ * the load failed, that it will call the overridden `findClass` function. To 
avoid the potential
+ * issue caused by loading class using inappropriate class loader, we should 
set the parent of
+ * ClassLoader to null, so that we can fully control which class loader is 
used. For detailed
+ * discussion, see SPARK-18646.
  */
 class ExecutorClassLoader(
 conf: SparkConf,
 env: SparkEnv,
 classUri: String,
 parent: ClassLoader,
-userClassPathFirst: Boolean) extends ClassLoader with Logging {
+userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 

http://git-wip-us.apache.org/repos/asf/spark/blob/39eba305/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
--
diff --git 
a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala 
b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 6d274bd..092d3c2 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,6 +23,8 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
+import java.util.Collections
+import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 import scala.language.implicitConversions
@@ -77,6 +79,50 @@ class ExecutorClassLoaderSuite
 }
   }
 
+  test("child over system classloader") {
+// JavaFileObject for scala.Option class

spark-website git commit: More 2.2.0 Release Notes

2017-07-12 Thread marmbrus
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 40f588bb5 -> 869f8a6fb


More 2.2.0 Release Notes


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/869f8a6f
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/869f8a6f
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/869f8a6f

Branch: refs/heads/asf-site
Commit: 869f8a6fb1548c773d2d75f63c55005417aeac35
Parents: 40f588b
Author: Michael Armbrust 
Authored: Wed Jul 12 22:54:55 2017 +
Committer: Michael Armbrust 
Committed: Wed Jul 12 22:54:55 2017 +

--
 releases/_posts/2017-07-11-spark-release-2-2-0.md | 4 +++-
 site/releases/spark-release-2-2-0.html| 7 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/869f8a6f/releases/_posts/2017-07-11-spark-release-2-2-0.md
--
diff --git a/releases/_posts/2017-07-11-spark-release-2-2-0.md 
b/releases/_posts/2017-07-11-spark-release-2-2-0.md
index 52ae28f..b630c75 100644
--- a/releases/_posts/2017-07-11-spark-release-2-2-0.md
+++ b/releases/_posts/2017-07-11-spark-release-2-2-0.md
@@ -128,6 +128,8 @@ The main focus of SparkR in the 2.2.0 release was adding 
extensive support for e
 
 ### Deprecations
 
+ - **Python**
+   - SPARK-12661: Drop support for Python 2.6
  - **MLlib**
- SPARK-18613: spark.ml LDA classes should not expose spark.mllib in APIs.  
In spark.ml.LDAModel, deprecated `oldLocalModel` and `getModel`.
  - **SparkR**
@@ -143,7 +145,7 @@ The main focus of SparkR in the 2.2.0 release was adding 
extensive support for e
 
 ### Known Issues
 
-- None
+- SPARK-21093: Multiple gapply execution occasionally failed in SparkR
 
 
 ### Credits

http://git-wip-us.apache.org/repos/asf/spark-website/blob/869f8a6f/site/releases/spark-release-2-2-0.html
--
diff --git a/site/releases/spark-release-2-2-0.html 
b/site/releases/spark-release-2-2-0.html
index 61504df..fc43088 100644
--- a/site/releases/spark-release-2-2-0.html
+++ b/site/releases/spark-release-2-2-0.html
@@ -371,6 +371,11 @@
 Deprecations
 
 
+  Python
+
+  SPARK-12661: Drop support for Python 2.6
+
+  
   MLlib
 
   SPARK-18613: spark.ml LDA classes should not expose spark.mllib in 
APIs.  In spark.ml.LDAModel, deprecated oldLocalModel and 
getModel.
@@ -401,7 +406,7 @@
 Known Issues
 
 
-  None
+  SPARK-21093: Multiple gapply execution occasionally failed in SparkR
 
 
 Credits


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark-website git commit: Fix 2.2.0 contributor list

2017-07-12 Thread marmbrus
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 2fac17731 -> 40f588bb5


Fix 2.2.0 contributor list


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/40f588bb
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/40f588bb
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/40f588bb

Branch: refs/heads/asf-site
Commit: 40f588bb525e21e457c5d839937350a5c18172c4
Parents: 2fac177
Author: Michael Armbrust 
Authored: Wed Jul 12 22:46:30 2017 +
Committer: Michael Armbrust 
Committed: Wed Jul 12 15:48:01 2017 -0700

--
 releases/_posts/2017-07-11-spark-release-2-2-0.md | 2 +-
 site/releases/spark-release-2-2-0.html| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/40f588bb/releases/_posts/2017-07-11-spark-release-2-2-0.md
--
diff --git a/releases/_posts/2017-07-11-spark-release-2-2-0.md 
b/releases/_posts/2017-07-11-spark-release-2-2-0.md
index 37d3638..52ae28f 100644
--- a/releases/_posts/2017-07-11-spark-release-2-2-0.md
+++ b/releases/_posts/2017-07-11-spark-release-2-2-0.md
@@ -148,4 +148,4 @@ The main focus of SparkR in the 2.2.0 release was adding 
extensive support for e
 
 ### Credits
 Last but not least, this release would not have been possible without the 
following contributors:
-ALeksander Eskilson, Aaditya Ramesh, Adam Roberts, Adrian Petrescu, Ahmed 
Mahran, Alex Bozarth, Alexander Shorin, Alexander Ulanov, Andrew Duffy, Andrew 
Mills, Andrew Ray, Angus Gerry, Anthony Truchet, Anton Okolnychyi, Artur 
Sukhenko, Bartek Wisniewski, Bijay Pathak, Bill Chambers, Bjarne Fruergaard, 
Brian Cho, Bryan Cutler, Burak Yavuz, Cen Yu Hai, Charles Allen, Cheng Lian, 
Chie Hayashida, Christian Kadner, Clark Fitzgerald, Cody Koeninger, Daniel 
Darabos, Daoyuan Wang, David Navas, Davies Liu, Denny Lee, Devaraj K, Dhruve 
Ashar, Dilip Biswal, Ding Ding, Dmitriy Sokolov, Dongjoon Hyun, Drew Robb, 
Ekasit Kijsipongse, Eren Avsarogullari, Ergin Seyfe, Eric Liang, Erik 
O'Shaughnessy, Eyal Farago, Felix Cheung, Ferdinand Xu, Fred Reiss, Fu Xing, 
Gabriel Huang, Gaetan Semet, Gang Wu, Gayathri Murali, Gu Huiqin Alice, 
Guoqiang Li, Gurvinder Singh, Hao Ren, Herman Van Hovell, Hiroshi Inoue, Holden 
Karau, Hossein Falaki, Huang Zhaowei, Huaxin Gao, Hyukjin Kwon, Imran Rashid, 
Jacek Laskows
 ki, Jagadeesan A S, Jakob Odersky, Jason White, Jeff Zhang, Jianfei Wang, 
Jiang Xingbo, Jie Huang, Jie Xiong, Jisoo Kim, John Muller, Jose Hiram Soltren, 
Joseph K. Bradley, Josh Rosen, Jun Kim, Junyang Qian, Justin Pihony, Kapil 
Singh, Kay Ousterhout, Kazuaki Ishizaki, Kevin Grealish, Kevin McHale, Kishor 
Patil, Koert Kuipers, Kousuke Saruta, Krishna Kalyan, Liang Ke, Liang-Chi 
Hsieh, Lianhui Wang, Linbo Jin, Liwei Lin, Luciano Resende, Maciej Brynski, 
Maciej Szymkiewicz, Mahmoud Rawas, Manoj Kumar, Marcelo Vanzin, Mariusz 
Strzelecki, Mark Grover, Maxime Rihouey, Miao Wang, Michael Allman, Michael 
Armbrust, Michael Gummelt, Michal Senkyr, Michal Wesolowski, Mikael Staldal, 
Mike Ihbe, Mitesh Patel, Nan Zhu, Nattavut Sutyanyong, Nic Eggert, Nicholas 
Chammas, Nick Lavers, Nick Pentreath, Nicolas Fraison, Noritaka Sekiyama, Peng 
Meng, Peng, Meng, Pete Robbins, Peter Ableda, Peter Lee, Philipp Hoffmann, 
Prashant Sharma, Prince J Wesley, Priyanka Garg, Qian Huang, Qifan Pu, Rajesh 
Balamoh
 an, Reynold Xin, Robert Kruszewski, Russell Spitzer, Ryan Blue, Saisai Shao, 
Sameer Agarwal, Sami Jaktholm, Sandeep Purohit, Sandeep Singh, Satendra Kumar, 
Sean Owen, Sean Zhong, Seth Hendrickson, Sharkd Tu, Shen Hong, Shivansh 
Srivastava, Shivaram Venkataraman, Shixiong Zhu, Shuai Lin, Shubham Chopra, 
Sital Kedia, Song Jun, Srinath Shankar, Stavros Kontopoulos, Stefan Schulze, 
Steve Loughran, Suman Somasundar, Sun Dapeng, Sun Rui, Sunitha Kambhampati, 
Suresh Thalamati, Susan X. Huynh, Sylvain Zimmer, Takeshi YAMAMURO, Takuya 
UESHIN, Tao LI, Tao Lin, Tao Wang, Tarun Kumar, Tathagata Das, Tejas Patil, 
Thomas Graves, Timothy Chen, Timothy Hunter, Tom Graves, Tom Magrino, Tommy YU, 
Tyson Condie, Uncle Gen, Vinayak Joshi, Vincent Xie, Wang Fei, Wang Lei, Wang 
Tao, Wayne Zhang, Weichen Xu, Weiluo (David) Ren, Weiqing Yang, Wenchen Fan, 
Wesley Tang, William Benton, Wojciech Szymanski, Xiangrui Meng, Xianyang Liu, 
Xiao Li, Xin Ren, Xin Wu, Xing SHI, Xusen Yin, Yadong Qi, Yanbo Liang, Yang 
 Wang, Yangyang Liu, Yin Huai, Yu Peng, Yucai Yu, Yuhao Yang, Yuming Wang, Yun 
Ni, Yves Raimond, Zhan Zhang, Zheng RuiFeng, Zhenhua Wang, pkch, tone-zhang, 
yimuxi
\ No newline at end of file
+ALeksander Eskilson, Aaditya Ramesh, Adam Budde, Adam Roberts, Adrian Ionescu, 
Ala Luszczak, Alex Bozarth, Andrew Ray, Anirudh Ramanathan, Anthony Truch

spark git commit: [SPARK-17701][SQL] Refactor RowDataSourceScanExec so its sameResult call does not compare strings

2017-07-12 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master d2d2a5de1 -> 780586a9f


[SPARK-17701][SQL] Refactor RowDataSourceScanExec so its sameResult call does 
not compare strings

## What changes were proposed in this pull request?

Currently, `RowDataSourceScanExec` and `FileSourceScanExec` rely on a 
"metadata" string map to implement equality comparison, since the RDDs they 
depend on cannot be directly compared. This has resulted in a number of 
correctness bugs around exchange reuse, e.g. SPARK-17673 and SPARK-16818.

To make these comparisons less brittle, we should refactor these classes to 
compare constructor parameters directly instead of relying on the metadata map.

This PR refactors `RowDataSourceScanExec`, `FileSourceScanExec` will be fixed 
in the follow-up PR.

## How was this patch tested?

existing tests

Author: Wenchen Fan 

Closes #18600 from cloud-fan/minor.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/780586a9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/780586a9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/780586a9

Branch: refs/heads/master
Commit: 780586a9f2400c3fdfdb9a6b954001a3c9663941
Parents: d2d2a5d
Author: Wenchen Fan 
Authored: Wed Jul 12 09:23:54 2017 -0700
Committer: gatorsmile 
Committed: Wed Jul 12 09:23:54 2017 -0700

--
 .../sql/execution/DataSourceScanExec.scala  | 65 ++--
 .../apache/spark/sql/execution/SparkPlan.scala  |  5 --
 .../spark/sql/execution/SparkPlanInfo.scala |  4 +-
 .../datasources/DataSourceStrategy.scala| 57 +++--
 .../spark/sql/execution/ui/SparkPlanGraph.scala |  5 +-
 5 files changed, 56 insertions(+), 80 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/780586a9/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index a0def68..588c937 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -33,21 +33,23 @@ import 
org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat 
=> ParquetSource}
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.sources.{BaseRelation, Filter}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
 trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
   val relation: BaseRelation
-  val metastoreTableIdentifier: Option[TableIdentifier]
+  val tableIdentifier: Option[TableIdentifier]
 
   protected val nodeNamePrefix: String = ""
 
   override val nodeName: String = {
-s"Scan $relation 
${metastoreTableIdentifier.map(_.unquotedString).getOrElse("")}"
+s"Scan $relation ${tableIdentifier.map(_.unquotedString).getOrElse("")}"
   }
 
+  // Metadata that describes more details of this scan.
+  protected def metadata: Map[String, String]
+
   override def simpleString: String = {
 val metadataEntries = metadata.toSeq.sorted.map {
   case (key, value) =>
@@ -73,34 +75,25 @@ trait DataSourceScanExec extends LeafExecNode with 
CodegenSupport {
 
 /** Physical plan node for scanning data from a relation. */
 case class RowDataSourceScanExec(
-output: Seq[Attribute],
+fullOutput: Seq[Attribute],
+requiredColumnsIndex: Seq[Int],
+filters: Set[Filter],
+handledFilters: Set[Filter],
 rdd: RDD[InternalRow],
 @transient relation: BaseRelation,
-override val outputPartitioning: Partitioning,
-override val metadata: Map[String, String],
-override val metastoreTableIdentifier: Option[TableIdentifier])
+override val tableIdentifier: Option[TableIdentifier])
   extends DataSourceScanExec {
 
+  def output: Seq[Attribute] = requiredColumnsIndex.map(fullOutput)
+
   override lazy val metrics =
 Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of 
output rows"))
 
-  val outputUnsafeRows = relation match {
-case r: HadoopFsRelation if r.fileFormat.isInstanceOf[ParquetSource] =>
-  !SparkSession.getActiveSession.get.sessionState.conf.getConf(
-SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
-case _: HadoopFsRelation => true
-case _ => false
-  }
-
   protected override def doExecute(): RDD[InternalRow] = {
-val unsafeRow = if (outputU

spark git commit: [SPARK-18619][ML] Make QuantileDiscretizer/Bucketizer/StringIndexer/RFormula inherit from HasHandleInvalid

2017-07-12 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/master aaad34dc2 -> d2d2a5de1


[SPARK-18619][ML] Make QuantileDiscretizer/Bucketizer/StringIndexer/RFormula 
inherit from HasHandleInvalid

## What changes were proposed in this pull request?
1, HasHandleInvaild support override
2, Make QuantileDiscretizer/Bucketizer/StringIndexer/RFormula inherit from 
HasHandleInvalid

## How was this patch tested?
existing tests

[JIRA](https://issues.apache.org/jira/browse/SPARK-18619)

Author: Zheng RuiFeng 

Closes #18582 from zhengruifeng/heritate_HasHandleInvalid.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2d2a5de
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2d2a5de
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2d2a5de

Branch: refs/heads/master
Commit: d2d2a5de186ddf381d0bdb353b23d64ff0224e7f
Parents: aaad34d
Author: Zheng RuiFeng 
Authored: Wed Jul 12 22:09:03 2017 +0800
Committer: Yanbo Liang 
Committed: Wed Jul 12 22:09:03 2017 +0800

--
 .../apache/spark/ml/feature/Bucketizer.scala| 14 ++---
 .../spark/ml/feature/QuantileDiscretizer.scala  | 13 ++---
 .../org/apache/spark/ml/feature/RFormula.scala  | 13 ++---
 .../apache/spark/ml/feature/StringIndexer.scala | 13 ++---
 .../ml/param/shared/SharedParamsCodeGen.scala   |  2 +-
 .../spark/ml/param/shared/sharedParams.scala|  2 +-
 .../GeneralizedLinearRegression.scala   |  2 +-
 .../spark/ml/regression/LinearRegression.scala  | 14 ++---
 python/pyspark/ml/feature.py| 60 
 9 files changed, 53 insertions(+), 80 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d2d2a5de/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 46b512f..6a11a75 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Model
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, 
HasOutputCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
 import org.apache.spark.sql.expressions.UserDefinedFunction
@@ -36,7 +36,8 @@ import org.apache.spark.sql.types.{DoubleType, StructField, 
StructType}
  */
 @Since("1.4.0")
 final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: 
String)
-  extends Model[Bucketizer] with HasInputCol with HasOutputCol with 
DefaultParamsWritable {
+  extends Model[Bucketizer] with HasHandleInvalid with HasInputCol with 
HasOutputCol
+with DefaultParamsWritable {
 
   @Since("1.4.0")
   def this() = this(Identifiable.randomUID("bucketizer"))
@@ -84,17 +85,12 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") 
override val uid: String
* Default: "error"
* @group param
*/
-  // TODO: SPARK-18619 Make Bucketizer inherit from HasHandleInvalid.
   @Since("2.1.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", 
"how to handle " +
-"invalid entries. Options are skip (filter out rows with invalid values), 
" +
+  override val handleInvalid: Param[String] = new Param[String](this, 
"handleInvalid",
+"how to handle invalid entries. Options are skip (filter out rows with 
invalid values), " +
 "error (throw an error), or keep (keep invalid values in a special 
additional bucket).",
 ParamValidators.inArray(Bucketizer.supportedHandleInvalids))
 
-  /** @group getParam */
-  @Since("2.1.0")
-  def getHandleInvalid: String = $(handleInvalid)
-
   /** @group setParam */
   @Since("2.1.0")
   def setHandleInvalid(value: String): this.type = set(handleInvalid, value)

http://git-wip-us.apache.org/repos/asf/spark/blob/d2d2a5de/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index feceeba..95e8830 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -22,7 +22,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.pa

spark-website git commit: Patch references to docs/programming-guide.html to docs/rdd-programming-guide.html

2017-07-12 Thread srowen
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 1c7fd01e9 -> 2fac17731


Patch references to docs/programming-guide.html to 
docs/rdd-programming-guide.html


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/2fac1773
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/2fac1773
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/2fac1773

Branch: refs/heads/asf-site
Commit: 2fac17731bdaafc3ce47be5d0adad682487f983c
Parents: 1c7fd01
Author: Sean Owen 
Authored: Wed Jul 12 12:20:26 2017 +0100
Committer: Sean Owen 
Committed: Wed Jul 12 12:20:26 2017 +0100

--
 examples.md   |  2 +-
 releases/_posts/2017-07-11-spark-release-2-2-0.md |  2 +-
 site/examples.html|  2 +-
 site/releases/spark-release-2-2-0.html|  2 +-
 site/sitemap.xml  | 14 +++---
 sitemap.xml   |  2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/2fac1773/examples.md
--
diff --git a/examples.md b/examples.md
index fe9cc79..1bc45d0 100644
--- a/examples.md
+++ b/examples.md
@@ -11,7 +11,7 @@ navigation:
 These examples give a quick overview of the Spark API.
 Spark is built on the concept of distributed datasets, which contain 
arbitrary Java or
 Python objects. You create a dataset from external data, then apply parallel 
operations
-to it. The building block of the Spark API is its [RDD 
API](https://spark.apache.org/docs/latest/programming-guide.html#resilient-distributed-datasets-rdds).
+to it. The building block of the Spark API is its [RDD 
API](https://spark.apache.org/docs/latest/rdd-programming-guide.html#resilient-distributed-datasets-rdds).
 In the RDD API,
 there are two types of operations: transformations, which define a 
new dataset based on previous ones,
 and actions, which kick off a job to execute on a cluster.

http://git-wip-us.apache.org/repos/asf/spark-website/blob/2fac1773/releases/_posts/2017-07-11-spark-release-2-2-0.md
--
diff --git a/releases/_posts/2017-07-11-spark-release-2-2-0.md 
b/releases/_posts/2017-07-11-spark-release-2-2-0.md
index 8027d8a..37d3638 100644
--- a/releases/_posts/2017-07-11-spark-release-2-2-0.md
+++ b/releases/_posts/2017-07-11-spark-release-2-2-0.md
@@ -59,7 +59,7 @@ To download Apache Spark 2.2.0, visit the Spark Programming 
Guide and Spark SQL, 
DataFrames and Datasets Guide.*
+*Programming guides: Spark RDD 
Programming Guide and Spark SQL, 
DataFrames and Datasets Guide.*
 
 
 ### Structured Streaming

http://git-wip-us.apache.org/repos/asf/spark-website/blob/2fac1773/site/examples.html
--
diff --git a/site/examples.html b/site/examples.html
index 439a62b..a4cfeda 100644
--- a/site/examples.html
+++ b/site/examples.html
@@ -199,7 +199,7 @@
 These examples give a quick overview of the Spark API.
 Spark is built on the concept of distributed datasets, which contain 
arbitrary Java or
 Python objects. You create a dataset from external data, then apply parallel 
operations
-to it. The building block of the Spark API is its https://spark.apache.org/docs/latest/programming-guide.html#resilient-distributed-datasets-rdds";>RDD
 API.
+to it. The building block of the Spark API is its https://spark.apache.org/docs/latest/rdd-programming-guide.html#resilient-distributed-datasets-rdds";>RDD
 API.
 In the RDD API,
 there are two types of operations: transformations, which define a 
new dataset based on previous ones,
 and actions, which kick off a job to execute on a cluster.

http://git-wip-us.apache.org/repos/asf/spark-website/blob/2fac1773/site/releases/spark-release-2-2-0.html
--
diff --git a/site/releases/spark-release-2-2-0.html 
b/site/releases/spark-release-2-2-0.html
index badc714..0460c7d 100644
--- a/site/releases/spark-release-2-2-0.html
+++ b/site/releases/spark-release-2-2-0.html
@@ -264,7 +264,7 @@
   
 
 
-Programming guides: Spark 
Programming Guide and Spark SQL, DataFrames and 
Datasets Guide.
+Programming guides: Spark RDD Programming Guide 
and Spark SQL, DataFrames and 
Datasets Guide.
 
 Structured Streaming
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/2fac1773/site/sitemap.xml
--
diff --git a/site/sitemap.xml b/site/sitemap.xml
index 591e871..0ce546f 100644
--- a/site/sitemap.xml
+++ b/site/sitemap.xml
@@ -22,7 +22,7 @@
   1.0
 
 
-  https://spark.ap

spark git commit: [SPARK-21007][SQL] Add SQL function - RIGHT && LEFT

2017-07-12 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master 5ed134ee2 -> aaad34dc2


[SPARK-21007][SQL] Add SQL function - RIGHT && LEFT

## What changes were proposed in this pull request?
 Add  SQL function - RIGHT && LEFT, same as MySQL:
https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_left
https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_right

## How was this patch tested?
unit test

Author: liuxian 

Closes #18228 from 10110346/lx-wip-0607.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aaad34dc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aaad34dc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aaad34dc

Branch: refs/heads/master
Commit: aaad34dc2f537f7eef50fc5f72a7f178800e8d38
Parents: 5ed134e
Author: liuxian 
Authored: Wed Jul 12 18:51:19 2017 +0800
Committer: Wenchen Fan 
Committed: Wed Jul 12 18:51:19 2017 +0800

--
 .../catalyst/analysis/FunctionRegistry.scala|  2 +
 .../expressions/stringExpressions.scala | 43 
 .../sql-tests/inputs/string-functions.sql   |  6 +++
 .../sql-tests/results/string-functions.sql.out  | 34 +++-
 4 files changed, 84 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/aaad34dc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index f4b3e86..10b22ae 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -346,6 +346,8 @@ object FunctionRegistry {
 expression[StringSplit]("split"),
 expression[Substring]("substr"),
 expression[Substring]("substring"),
+expression[Left]("left"),
+expression[Right]("right"),
 expression[SubstringIndex]("substring_index"),
 expression[StringTranslate]("translate"),
 expression[StringTrim]("trim"),

http://git-wip-us.apache.org/repos/asf/spark/blob/aaad34dc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 83fdcfc..d75b9d6 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1199,6 +1199,49 @@ case class Substring(str: Expression, pos: Expression, 
len: Expression)
 }
 
 /**
+ * Returns the rightmost n characters from the string.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(str, len) - Returns the rightmost `len`(`len` can be string 
type) characters from the string `str`,if `len` is less or equal than 0 the 
result is an empty string.",
+  extended = """
+Examples:
+  > SELECT _FUNC_('Spark SQL', 3);
+   SQL
+  """)
+// scalastyle:on line.size.limit
+case class Right(str: Expression, len: Expression, child: Expression) extends 
RuntimeReplaceable {
+  def this(str: Expression, len: Expression) = {
+this(str, len, If(IsNull(str), Literal(null, StringType), 
If(LessThanOrEqual(len, Literal(0)),
+  Literal(UTF8String.EMPTY_UTF8, StringType), new Substring(str, 
UnaryMinus(len)
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(str, len)
+  override def sql: String = s"$prettyName(${str.sql}, ${len.sql})"
+}
+
+/**
+ * Returns the leftmost n characters from the string.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(str, len) - Returns the leftmost `len`(`len` can be string 
type) characters from the string `str`,if `len` is less or equal than 0 the 
result is an empty string.",
+  extended = """
+Examples:
+  > SELECT _FUNC_('Spark SQL', 3);
+   Spa
+  """)
+// scalastyle:on line.size.limit
+case class Left(str: Expression, len: Expression, child: Expression) extends 
RuntimeReplaceable {
+  def this(str: Expression, len: Expression) = {
+this(str, len, Substring(str, Literal(1), len))
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(str, len)
+  override def sql: String = s"$prettyName(${str.sql}, ${len.sql})"
+}
+
+/**
  * A function that returns the char length of the given string expression or
  * num

spark-website git commit: Add note about CVE-2017-7678

2017-07-12 Thread srowen
Repository: spark-website
Updated Branches:
  refs/heads/asf-site f2d5d2a68 -> 1c7fd01e9


Add note about CVE-2017-7678


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/1c7fd01e
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/1c7fd01e
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/1c7fd01e

Branch: refs/heads/asf-site
Commit: 1c7fd01e9eb258407da07dc765444e4cf8c28f7c
Parents: f2d5d2a
Author: Sean Owen 
Authored: Wed Jul 12 11:24:41 2017 +0100
Committer: Sean Owen 
Committed: Wed Jul 12 11:24:41 2017 +0100

--
 security.md| 43 ++-
 site/security.html | 42 +-
 2 files changed, 83 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/1c7fd01e/security.md
--
diff --git a/security.md b/security.md
index 505c225..a26f1d1 100644
--- a/security.md
+++ b/security.md
@@ -17,4 +17,45 @@ non-public list that will reach the Spark PMC. Messages to 
`secur...@apache.org`
 
 Known Security Issues
 
-None yet.
\ No newline at end of file
+CVE-2017-7678 Apache Spark XSS web UI MHTML 
vulnerability
+
+Severity: Low
+
+Vendor: The Apache Software Foundation
+
+Versions Affected:
+Versions of Apache Spark before 2.2.0
+
+Description:
+It is possible for an attacker to take advantage of a user's trust in the 
server to trick them into visiting a link that points to a shared Spark cluster 
and submits data including MHTML to the Spark master, or history server. This 
data, which could contain a script, would then be reflected back to the user 
and could be evaluated and executed by MS Windows-based clients. It is not an 
attack on Spark itself, but on the user, who may then execute the script 
inadvertently when viewing elements of the Spark web UIs.
+
+Mitigation:
+Update to Apache Spark 2.2.0 or later.
+
+Example:
+Request:
+```
+GET /app/?appId=Content-Type:%20multipart/related;%20boundary=_AppScan%0d%0a--
+_AppScan%0d%0aContent-Location:foo%0d%0aContent-Transfer-
+Encoding:base64%0d%0a%0d%0aPGh0bWw%2bPHNjcmlwdD5hbGVydCgiWFNTIik8L3NjcmlwdD48L2h0bWw%2b%0d%0a
+HTTP/1.1
+```
+
+Excerpt from response:
+```
+No running application with ID Content-Type: 
multipart/related;
+boundary=_AppScan
+--_AppScan
+Content-Location:foo
+Content-Transfer-Encoding:base64
+PGh0bWw+PHNjcmlwdD5hbGVydCgiWFNTIik8L3NjcmlwdD48L2h0bWw+
+
+```
+Result: In the above payload the BASE64 data decodes as:
+```
+alert("XSS")
+```
+
+Credit:
+- Mike Kasper, Nicholas Marion
+- IBM z Systems Center for Secure Engineering
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark-website/blob/1c7fd01e/site/security.html
--
diff --git a/site/security.html b/site/security.html
index 83345d3..3ef0942 100644
--- a/site/security.html
+++ b/site/security.html
@@ -204,7 +204,47 @@ non-public list that will reach the Spark PMC. Messages to 
security@apache
 
 Known Security Issues
 
-None yet.
+CVE-2017-7678 Apache Spark XSS web UI MHTML 
vulnerability
+
+Severity: Low
+
+Vendor: The Apache Software Foundation
+
+Versions Affected:
+Versions of Apache Spark before 2.2.0
+
+Description:
+It is possible for an attacker to take advantage of a user’s trust in 
the server to trick them into visiting a link that points to a shared Spark 
cluster and submits data including MHTML to the Spark master, or history 
server. This data, which could contain a script, would then be reflected back 
to the user and could be evaluated and executed by MS Windows-based clients. It 
is not an attack on Spark itself, but on the user, who may then execute the 
script inadvertently when viewing elements of the Spark web UIs.
+
+Mitigation:
+Update to Apache Spark 2.2.0 or later.
+
+Example:
+Request:
+GET 
/app/?appId=Content-Type:%20multipart/related;%20boundary=_AppScan%0d%0a--
+_AppScan%0d%0aContent-Location:foo%0d%0aContent-Transfer-
+Encoding:base64%0d%0a%0d%0aPGh0bWw%2bPHNjcmlwdD5hbGVydCgiWFNTIik8L3NjcmlwdD48L2h0bWw%2b%0d%0a
+HTTP/1.1
+
+
+Excerpt from response:
+
No running application with ID Content-Type: multipart/related; +boundary=_AppScan +--_AppScan +Content-Location:foo +Content-Transfer-Encoding:base64 +PGh0bWw+PHNjcmlwdD5hbGVydCgiWFNTIik8L3NjcmlwdD48L2h0bWw+ +
+ +Result: In the above payload the BASE64 data decodes as: + + + +Credit: + + Mike Kasper, Nicholas Marion + IBM z Systems Center for Secure Engineering + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail:

spark git commit: [SPARK-21305][ML][MLLIB] Add options to disable multi-threading of native BLAS

2017-07-12 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master f587d2e3f -> 5ed134ee2


[SPARK-21305][ML][MLLIB] Add options to disable multi-threading of native BLAS

## What changes were proposed in this pull request?

Many ML/MLLIB algorithms use native BLAS (like Intel MKL, ATLAS, OpenBLAS) to 
improvement the performance.
Many popular Native BLAS, like Intel MKL, OpenBLAS, use multi-threading 
technology, which will conflict with Spark.  Spark should provide options to 
disable multi-threading of Native BLAS.

https://github.com/xianyi/OpenBLAS/wiki/faq#multi-threaded
https://software.intel.com/en-us/articles/recommended-settings-for-calling-intel-mkl-routines-from-multi-threaded-applications

## How was this patch tested?
The existing UT.

Author: Peng Meng 

Closes #18551 from mpjlu/optimzeBLAS.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5ed134ee
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5ed134ee
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5ed134ee

Branch: refs/heads/master
Commit: 5ed134ee213060882c6e3ed713473fa6cc158d36
Parents: f587d2e
Author: Peng Meng 
Authored: Wed Jul 12 11:02:04 2017 +0100
Committer: Sean Owen 
Committed: Wed Jul 12 11:02:04 2017 +0100

--
 conf/spark-env.sh.template | 4 
 docs/ml-guide.md   | 6 ++
 2 files changed, 10 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5ed134ee/conf/spark-env.sh.template
--
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index b9aab5a..1663019 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -61,3 +61,7 @@
 # - SPARK_IDENT_STRING  A string representing this instance of spark. 
(Default: $USER)
 # - SPARK_NICENESS  The scheduling priority for daemons. (Default: 0)
 # - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will 
not output a PID file.
+# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
+# You might get better performance to enable these options if using native 
BLAS (see SPARK-21305).
+# - MKL_NUM_THREADS=1Disable multi-threading of Intel MKL
+# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS

http://git-wip-us.apache.org/repos/asf/spark/blob/5ed134ee/docs/ml-guide.md
--
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index fb46213..adb1c9a 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -61,6 +61,12 @@ To configure `netlib-java` / Breeze to use system optimised 
binaries, include
 project and read the [netlib-java](https://github.com/fommil/netlib-java) 
documentation for your
 platform's additional installation instructions.
 
+The most popular native BLAS such as [Intel 
MKL](https://software.intel.com/en-us/mkl), 
[OpenBLAS](http://www.openblas.net), can use multiple threads in a single 
operation, which can conflict with Spark's execution model.
+
+Configuring these BLAS implementations to use a single thread for operations 
may actually improve performance (see 
[SPARK-21305](https://issues.apache.org/jira/browse/SPARK-21305)). It is 
usually optimal to match this to the number of cores each Spark task is 
configured to use, which is 1 by default and typically left at 1.
+
+Please refer to resources like the following to understand how to configure 
the number of threads these BLAS implementations use: [Intel 
MKL](https://software.intel.com/en-us/articles/recommended-settings-for-calling-intel-mkl-routines-from-multi-threaded-applications)
 and [OpenBLAS](https://github.com/xianyi/OpenBLAS/wiki/faq#multi-threaded).
+
 To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 
1.4 or newer.
 
 [^1]: To learn more about the benefits and background of system optimised 
natives, you may wish to


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-20842][SQL] Upgrade to 1.2.2 for Hive Metastore Client 1.2

2017-07-12 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master e0af76a36 -> f587d2e3f


[SPARK-20842][SQL] Upgrade to 1.2.2 for Hive Metastore Client 1.2

### What changes were proposed in this pull request?
Hive 1.2.2 release is available. Below is the list of bugs fixed in 1.2.2

https://issues.apache.org/jira/secure/ReleaseNote.jspa?version=12332952&styleName=Text&projectId=12310843

### How was this patch tested?
N/A

Author: Xiao Li 

Closes #18063 from gatorsmile/upgradeHiveClientTo1.2.2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f587d2e3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f587d2e3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f587d2e3

Branch: refs/heads/master
Commit: f587d2e3fa133051a64e4ec1aa788b554b552690
Parents: e0af76a
Author: Xiao Li 
Authored: Wed Jul 12 15:48:44 2017 +0800
Committer: Wenchen Fan 
Committed: Wed Jul 12 15:48:44 2017 +0800

--
 .../org/apache/spark/sql/hive/client/IsolatedClientLoader.scala| 2 +-
 .../src/main/scala/org/apache/spark/sql/hive/client/package.scala  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f587d2e3/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index b8aa067..930f0dd 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -92,7 +92,7 @@ private[hive] object IsolatedClientLoader extends Logging {
 case "14" | "0.14" | "0.14.0" => hive.v14
 case "1.0" | "1.0.0" => hive.v1_0
 case "1.1" | "1.1.0" => hive.v1_1
-case "1.2" | "1.2.0" | "1.2.1" => hive.v1_2
+case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
 case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
 case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/f587d2e3/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index f9635e3..c14154a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -56,7 +56,7 @@ package object client {
 "net.hydromatic:linq4j",
 "net.hydromatic:quidem"))
 
-case object v1_2 extends HiveVersion("1.2.1",
+case object v1_2 extends HiveVersion("1.2.2",
   exclusions = Seq("eigenbase:eigenbase-properties",
 "org.apache.curator:*",
 "org.pentaho:pentaho-aggdesigner-algorithm",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-21370][SS] Add test for state reliability when one read-only state store aborts after read-write state store commits

2017-07-12 Thread tdas
Repository: spark
Updated Branches:
  refs/heads/master e16e8c7ad -> e0af76a36


[SPARK-21370][SS] Add test for state reliability when one read-only state store 
aborts after read-write state store commits

## What changes were proposed in this pull request?

During Streaming Aggregation, we have two StateStores per task, one used as 
read-only in
`StateStoreRestoreExec`, and one read-write used in `StateStoreSaveExec`. 
`StateStore.abort`
will be called for these StateStores if they haven't committed their results. 
We need to
make sure that `abort` in read-only store after a `commit` in the read-write 
store doesn't
accidentally lead to the deletion of state.

This PR adds a test for this condition.

## How was this patch tested?

This PR adds a test.

Author: Burak Yavuz 

Closes #18603 from brkyvz/ss-test.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e0af76a3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e0af76a3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e0af76a3

Branch: refs/heads/master
Commit: e0af76a36a67d409776bd379c6d6ef6d60356c06
Parents: e16e8c7
Author: Burak Yavuz 
Authored: Wed Jul 12 00:39:09 2017 -0700
Committer: Tathagata Das 
Committed: Wed Jul 12 00:39:09 2017 -0700

--
 .../streaming/state/StateStoreSuite.scala   | 31 
 1 file changed, 31 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e0af76a3/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index c2087ec..7cb86dc 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -665,6 +665,37 @@ abstract class StateStoreSuiteBase[ProviderClass <: 
StateStoreProvider]
 checkInvalidVersion(3)
   }
 
+  test("two concurrent StateStores - one for read-only and one for 
read-write") {
+// During Streaming Aggregation, we have two StateStores per task, one 
used as read-only in
+// `StateStoreRestoreExec`, and one read-write used in 
`StateStoreSaveExec`. `StateStore.abort`
+// will be called for these StateStores if they haven't committed their 
results. We need to
+// make sure that `abort` in read-only store after a `commit` in the 
read-write store doesn't
+// accidentally lead to the deletion of state.
+val dir = newDir()
+val storeId = StateStoreId(dir, 0L, 1)
+val provider0 = newStoreProvider(storeId)
+// prime state
+val store = provider0.getStore(0)
+val key = "a"
+put(store, key, 1)
+store.commit()
+assert(rowsToSet(store.iterator()) === Set(key -> 1))
+
+// two state stores
+val provider1 = newStoreProvider(storeId)
+val restoreStore = provider1.getStore(1)
+val saveStore = provider1.getStore(1)
+
+put(saveStore, key, get(restoreStore, key).get + 1)
+saveStore.commit()
+restoreStore.abort()
+
+// check that state is correct for next batch
+val provider2 = newStoreProvider(storeId)
+val finalStore = provider2.getStore(2)
+assert(rowsToSet(finalStore.iterator()) === Set(key -> 2))
+  }
+
   /** Return a new provider with a random id */
   def newStoreProvider(): ProviderClass
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-21146][CORE] Master/Worker should handle and shutdown when any thread gets UncaughtException

2017-07-12 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master 24367f23f -> e16e8c7ad


[SPARK-21146][CORE] Master/Worker should handle and shutdown when any thread 
gets UncaughtException

## What changes were proposed in this pull request?

Adding the default UncaughtExceptionHandler to the Worker.

## How was this patch tested?

I verified it manually, when any of the worker thread gets uncaught exceptions 
then the default UncaughtExceptionHandler will handle those exceptions.

Author: Devaraj K 

Closes #18357 from devaraj-kavali/SPARK-21146.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e16e8c7a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e16e8c7a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e16e8c7a

Branch: refs/heads/master
Commit: e16e8c7ad31762aaca5e2bc874de1540af9cc4b7
Parents: 24367f2
Author: Devaraj K 
Authored: Wed Jul 12 00:14:58 2017 -0700
Committer: Shixiong Zhu 
Committed: Wed Jul 12 00:14:58 2017 -0700

--
 .../scala/org/apache/spark/deploy/master/Master.scala|  4 +++-
 .../scala/org/apache/spark/deploy/worker/Worker.scala|  4 +++-
 .../main/scala/org/apache/spark/executor/Executor.scala  |  2 +-
 .../spark/util/SparkUncaughtExceptionHandler.scala   | 11 ++-
 core/src/main/scala/org/apache/spark/util/Utils.scala|  4 +++-
 .../spark/deploy/mesos/MesosClusterDispatcher.scala  |  2 +-
 6 files changed, 17 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e16e8c7a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala 
b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 0dee25f..4cc580e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -36,7 +36,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.serializer.{JavaSerializer, Serializer}
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, 
Utils}
 
 private[deploy] class Master(
 override val rpcEnv: RpcEnv,
@@ -1045,6 +1045,8 @@ private[deploy] object Master extends Logging {
   val ENDPOINT_NAME = "Master"
 
   def main(argStrings: Array[String]) {
+Thread.setDefaultUncaughtExceptionHandler(new 
SparkUncaughtExceptionHandler(
+  exitOnUncaughtException = false))
 Utils.initDaemon(log)
 val conf = new SparkConf
 val args = new MasterArguments(argStrings, conf)

http://git-wip-us.apache.org/repos/asf/spark/blob/e16e8c7a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala 
b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index bed4745..f6d3876 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -38,7 +38,7 @@ import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, 
Utils}
 
 private[deploy] class Worker(
 override val rpcEnv: RpcEnv,
@@ -737,6 +737,8 @@ private[deploy] object Worker extends Logging {
   val ENDPOINT_NAME = "Worker"
 
   def main(argStrings: Array[String]) {
+Thread.setDefaultUncaughtExceptionHandler(new 
SparkUncaughtExceptionHandler(
+  exitOnUncaughtException = false))
 Utils.initDaemon(log)
 val conf = new SparkConf
 val args = new WorkerArguments(argStrings, conf)

http://git-wip-us.apache.org/repos/asf/spark/blob/e16e8c7a/core/src/main/scala/org/apache/spark/executor/Executor.scala
--
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala 
b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 19e7eb0..21f0db1 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -56,7 +56,7 @@ private[spark] class Executor(
 env: SparkEnv,
 userClassPath: Seq[URL] = Nil,
 isLocal: Boolean = false,
-uncaughtExceptionHandler: UncaughtExceptionHandler = 
SparkUncaughtExceptionHandler)
+uncaughtExceptionHandler: UncaughtExceptionHandler = new 
SparkUncaug