Repository: spark
Updated Branches:
  refs/heads/branch-2.0 3727e2859 -> beaf703d5


[SPARK-15061][PYSPARK] Upgrade to Py4J 0.10.1

## What changes were proposed in this pull request?

This upgrades to Py4J 0.10.1 which reduces syscal overhead in Java gateway ( 
see https://github.com/bartdag/py4j/issues/201 ). Related 
https://issues.apache.org/jira/browse/SPARK-6728 .

## How was this patch tested?

Existing doctests & unit tests pass

Author: Holden Karau <[email protected]>

Closes #13064 from holdenk/SPARK-15061-upgrade-to-py4j-0.10.1.

(cherry picked from commit 382dbc12bb4b06871850a94f88018e76058a9b52)
Signed-off-by: Sean Owen <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/beaf703d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/beaf703d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/beaf703d

Branch: refs/heads/branch-2.0
Commit: beaf703d5472a90f529d5194f5cc657b45871087
Parents: 3727e28
Author: Holden Karau <[email protected]>
Authored: Fri May 13 08:59:18 2016 +0100
Committer: Sean Owen <[email protected]>
Committed: Fri May 13 08:59:28 2016 +0100

----------------------------------------------------------------------
 LICENSE                                            |   2 +-
 bin/pyspark                                        |   2 +-
 bin/pyspark2.cmd                                   |   2 +-
 core/pom.xml                                       |   2 +-
 .../org/apache/spark/api/python/PythonUtils.scala  |   2 +-
 dev/deps/spark-deps-hadoop-2.2                     |   2 +-
 dev/deps/spark-deps-hadoop-2.3                     |   2 +-
 dev/deps/spark-deps-hadoop-2.4                     |   2 +-
 dev/deps/spark-deps-hadoop-2.6                     |   2 +-
 dev/deps/spark-deps-hadoop-2.7                     |   2 +-
 python/docs/Makefile                               |   2 +-
 python/lib/py4j-0.10.1-src.zip                     | Bin 0 -> 61356 bytes
 python/lib/py4j-0.9.2-src.zip                      | Bin 55521 -> 0 bytes
 sbin/spark-config.sh                               |   2 +-
 .../org/apache/spark/deploy/yarn/Client.scala      |   4 ++--
 .../spark/deploy/yarn/YarnClusterSuite.scala       |   2 +-
 16 files changed, 15 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index 9714b3b..f403640 100644
--- a/LICENSE
+++ b/LICENSE
@@ -263,7 +263,7 @@ The text of each license is also included at 
licenses/LICENSE-[project].txt.
      (New BSD license) Protocol Buffer Java API 
(org.spark-project.protobuf:protobuf-java:2.4.1-shaded - 
http://code.google.com/p/protobuf)
      (The BSD License) Fortran to Java ARPACK 
(net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - 
http://xmlenc.sourceforge.net)
-     (The New BSD License) Py4J (net.sf.py4j:py4j:0.9.2 - 
http://py4j.sourceforge.net/)
+     (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - 
http://py4j.sourceforge.net/)
      (Two-clause BSD-style license) JUnit-Interface 
(com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
      (BSD licence) sbt and sbt-launch-lib.bash
      (BSD 3 Clause) d3.min.js 
(https://github.com/mbostock/d3/blob/master/LICENSE)

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/bin/pyspark
----------------------------------------------------------------------
diff --git a/bin/pyspark b/bin/pyspark
index d1fe75a..396a07c 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -63,7 +63,7 @@ export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9.2-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.1-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/bin/pyspark2.cmd
----------------------------------------------------------------------
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index cb78849..3e2ff10 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.9.2-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.1-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 8584b62..c985352 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -328,7 +328,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.9.2</version>
+      <version>0.10.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala 
b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 8bcd290..64cf498 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -32,7 +32,7 @@ private[spark] object PythonUtils {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
       pythonPath += Seq(sparkHome, "python", "lib", 
"pyspark.zip").mkString(File.separator)
-      pythonPath += Seq(sparkHome, "python", "lib", 
"py4j-0.9.2-src.zip").mkString(File.separator)
+      pythonPath += Seq(sparkHome, "python", "lib", 
"py4j-0.10.1-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)
     pythonPath.mkString(File.pathSeparator)

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/dev/deps/spark-deps-hadoop-2.2
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index 83bdd90..2477312 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -140,7 +140,7 @@ pmml-agent-1.2.7.jar
 pmml-model-1.2.7.jar
 pmml-schema-1.2.7.jar
 protobuf-java-2.5.0.jar
-py4j-0.9.2.jar
+py4j-0.10.1.jar
 pyrolite-4.9.jar
 scala-compiler-2.11.8.jar
 scala-library-2.11.8.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/dev/deps/spark-deps-hadoop-2.3
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 121e282..0181a47 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -147,7 +147,7 @@ pmml-agent-1.2.7.jar
 pmml-model-1.2.7.jar
 pmml-schema-1.2.7.jar
 protobuf-java-2.5.0.jar
-py4j-0.9.2.jar
+py4j-0.10.1.jar
 pyrolite-4.9.jar
 scala-compiler-2.11.8.jar
 scala-library-2.11.8.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/dev/deps/spark-deps-hadoop-2.4
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 1d5ad27..f7ff234 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -147,7 +147,7 @@ pmml-agent-1.2.7.jar
 pmml-model-1.2.7.jar
 pmml-schema-1.2.7.jar
 protobuf-java-2.5.0.jar
-py4j-0.9.2.jar
+py4j-0.10.1.jar
 pyrolite-4.9.jar
 scala-compiler-2.11.8.jar
 scala-library-2.11.8.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/dev/deps/spark-deps-hadoop-2.6
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 909b94b..92db55d 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -155,7 +155,7 @@ pmml-agent-1.2.7.jar
 pmml-model-1.2.7.jar
 pmml-schema-1.2.7.jar
 protobuf-java-2.5.0.jar
-py4j-0.9.2.jar
+py4j-0.10.1.jar
 pyrolite-4.9.jar
 scala-compiler-2.11.8.jar
 scala-library-2.11.8.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/dev/deps/spark-deps-hadoop-2.7
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 7507599..44b9b04 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -156,7 +156,7 @@ pmml-agent-1.2.7.jar
 pmml-model-1.2.7.jar
 pmml-schema-1.2.7.jar
 protobuf-java-2.5.0.jar
-py4j-0.9.2.jar
+py4j-0.10.1.jar
 pyrolite-4.9.jar
 scala-compiler-2.11.8.jar
 scala-library-2.11.8.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/python/docs/Makefile
----------------------------------------------------------------------
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 905e021..12e397e 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -7,7 +7,7 @@ SPHINXBUILD   ?= sphinx-build
 PAPER         ?=
 BUILDDIR      ?= _build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.9.2-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.1-src.zip)
 
 # User-friendly check for sphinx-build
 ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/python/lib/py4j-0.10.1-src.zip
----------------------------------------------------------------------
diff --git a/python/lib/py4j-0.10.1-src.zip b/python/lib/py4j-0.10.1-src.zip
new file mode 100644
index 0000000..a54bcae
Binary files /dev/null and b/python/lib/py4j-0.10.1-src.zip differ

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/python/lib/py4j-0.9.2-src.zip
----------------------------------------------------------------------
diff --git a/python/lib/py4j-0.9.2-src.zip b/python/lib/py4j-0.9.2-src.zip
deleted file mode 100644
index 881bb75..0000000
Binary files a/python/lib/py4j-0.9.2-src.zip and /dev/null differ

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/sbin/spark-config.sh
----------------------------------------------------------------------
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index 97df433..5f7bf41 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -27,4 +27,4 @@ fi
 export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9.2-src.zip:${PYTHONPATH}"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.1-src.zip:${PYTHONPATH}"

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 7ea58af..3f6d7b2 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1138,9 +1138,9 @@ private[spark] class Client(
         val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
         require(pyArchivesFile.exists(),
           "pyspark.zip not found; cannot run pyspark application in YARN 
mode.")
-        val py4jFile = new File(pyLibPath, "py4j-0.9.2-src.zip")
+        val py4jFile = new File(pyLibPath, "py4j-0.10.1-src.zip")
         require(py4jFile.exists(),
-          "py4j-0.9.2-src.zip not found; cannot run pyspark application in 
YARN mode.")
+          "py4j-0.10.1-src.zip not found; cannot run pyspark application in 
YARN mode.")
         Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())
       }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/beaf703d/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
----------------------------------------------------------------------
diff --git 
a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala 
b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 7df11ca..c465604 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -197,7 +197,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     // needed locations.
     val sparkHome = sys.props("spark.test.home")
     val pythonPath = Seq(
-        s"$sparkHome/python/lib/py4j-0.9.2-src.zip",
+        s"$sparkHome/python/lib/py4j-0.10.1-src.zip",
         s"$sparkHome/python")
     val extraEnv = Map(
       "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + 
_).mkString(File.pathSeparator),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to