Repository: spark Updated Branches: refs/heads/master bdff299f9 -> 382dbc12b
[SPARK-15061][PYSPARK] Upgrade to Py4J 0.10.1 ## What changes were proposed in this pull request? This upgrades to Py4J 0.10.1 which reduces syscal overhead in Java gateway ( see https://github.com/bartdag/py4j/issues/201 ). Related https://issues.apache.org/jira/browse/SPARK-6728 . ## How was this patch tested? Existing doctests & unit tests pass Author: Holden Karau <[email protected]> Closes #13064 from holdenk/SPARK-15061-upgrade-to-py4j-0.10.1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/382dbc12 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/382dbc12 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/382dbc12 Branch: refs/heads/master Commit: 382dbc12bb4b06871850a94f88018e76058a9b52 Parents: bdff299 Author: Holden Karau <[email protected]> Authored: Fri May 13 08:59:18 2016 +0100 Committer: Sean Owen <[email protected]> Committed: Fri May 13 08:59:18 2016 +0100 ---------------------------------------------------------------------- LICENSE | 2 +- bin/pyspark | 2 +- bin/pyspark2.cmd | 2 +- core/pom.xml | 2 +- .../org/apache/spark/api/python/PythonUtils.scala | 2 +- dev/deps/spark-deps-hadoop-2.2 | 2 +- dev/deps/spark-deps-hadoop-2.3 | 2 +- dev/deps/spark-deps-hadoop-2.4 | 2 +- dev/deps/spark-deps-hadoop-2.6 | 2 +- dev/deps/spark-deps-hadoop-2.7 | 2 +- python/docs/Makefile | 2 +- python/lib/py4j-0.10.1-src.zip | Bin 0 -> 61356 bytes python/lib/py4j-0.9.2-src.zip | Bin 55521 -> 0 bytes sbin/spark-config.sh | 2 +- .../org/apache/spark/deploy/yarn/Client.scala | 4 ++-- .../spark/deploy/yarn/YarnClusterSuite.scala | 2 +- 16 files changed, 15 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/LICENSE ---------------------------------------------------------------------- diff --git a/LICENSE b/LICENSE index 9714b3b..f403640 100644 --- a/LICENSE +++ b/LICENSE @@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf) (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net) (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) - (The New BSD License) Py4J (net.sf.py4j:py4j:0.9.2 - http://py4j.sourceforge.net/) + (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.1 - http://py4j.sourceforge.net/) (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/) (BSD licence) sbt and sbt-launch-lib.bash (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE) http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/bin/pyspark ---------------------------------------------------------------------- diff --git a/bin/pyspark b/bin/pyspark index d1fe75a..396a07c 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -63,7 +63,7 @@ export PYSPARK_PYTHON # Add the PySpark classes to the Python path: export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9.2-src.zip:$PYTHONPATH" +export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.1-src.zip:$PYTHONPATH" # Load the PySpark shell.py script when ./pyspark is used interactively: export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/bin/pyspark2.cmd ---------------------------------------------------------------------- diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index cb78849..3e2ff10 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( ) set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% -set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.9.2-src.zip;%PYTHONPATH% +set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.1-src.zip;%PYTHONPATH% set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/core/pom.xml ---------------------------------------------------------------------- diff --git a/core/pom.xml b/core/pom.xml index 8584b62..c985352 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -328,7 +328,7 @@ <dependency> <groupId>net.sf.py4j</groupId> <artifactId>py4j</artifactId> - <version>0.9.2</version> + <version>0.10.1</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 8bcd290..64cf498 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -32,7 +32,7 @@ private[spark] object PythonUtils { val pythonPath = new ArrayBuffer[String] for (sparkHome <- sys.env.get("SPARK_HOME")) { pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator) - pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.9.2-src.zip").mkString(File.separator) + pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.1-src.zip").mkString(File.separator) } pythonPath ++= SparkContext.jarOfObject(this) pythonPath.mkString(File.pathSeparator) http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/dev/deps/spark-deps-hadoop-2.2 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2 index 83bdd90..2477312 100644 --- a/dev/deps/spark-deps-hadoop-2.2 +++ b/dev/deps/spark-deps-hadoop-2.2 @@ -140,7 +140,7 @@ pmml-agent-1.2.7.jar pmml-model-1.2.7.jar pmml-schema-1.2.7.jar protobuf-java-2.5.0.jar -py4j-0.9.2.jar +py4j-0.10.1.jar pyrolite-4.9.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/dev/deps/spark-deps-hadoop-2.3 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 index 121e282..0181a47 100644 --- a/dev/deps/spark-deps-hadoop-2.3 +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -147,7 +147,7 @@ pmml-agent-1.2.7.jar pmml-model-1.2.7.jar pmml-schema-1.2.7.jar protobuf-java-2.5.0.jar -py4j-0.9.2.jar +py4j-0.10.1.jar pyrolite-4.9.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/dev/deps/spark-deps-hadoop-2.4 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 index 1d5ad27..f7ff234 100644 --- a/dev/deps/spark-deps-hadoop-2.4 +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -147,7 +147,7 @@ pmml-agent-1.2.7.jar pmml-model-1.2.7.jar pmml-schema-1.2.7.jar protobuf-java-2.5.0.jar -py4j-0.9.2.jar +py4j-0.10.1.jar pyrolite-4.9.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/dev/deps/spark-deps-hadoop-2.6 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 909b94b..92db55d 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -155,7 +155,7 @@ pmml-agent-1.2.7.jar pmml-model-1.2.7.jar pmml-schema-1.2.7.jar protobuf-java-2.5.0.jar -py4j-0.9.2.jar +py4j-0.10.1.jar pyrolite-4.9.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/dev/deps/spark-deps-hadoop-2.7 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 7507599..44b9b04 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -156,7 +156,7 @@ pmml-agent-1.2.7.jar pmml-model-1.2.7.jar pmml-schema-1.2.7.jar protobuf-java-2.5.0.jar -py4j-0.9.2.jar +py4j-0.10.1.jar pyrolite-4.9.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/python/docs/Makefile ---------------------------------------------------------------------- diff --git a/python/docs/Makefile b/python/docs/Makefile index 905e021..12e397e 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -7,7 +7,7 @@ SPHINXBUILD ?= sphinx-build PAPER ?= BUILDDIR ?= _build -export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.9.2-src.zip) +export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.1-src.zip) # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/python/lib/py4j-0.10.1-src.zip ---------------------------------------------------------------------- diff --git a/python/lib/py4j-0.10.1-src.zip b/python/lib/py4j-0.10.1-src.zip new file mode 100644 index 0000000..a54bcae Binary files /dev/null and b/python/lib/py4j-0.10.1-src.zip differ http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/python/lib/py4j-0.9.2-src.zip ---------------------------------------------------------------------- diff --git a/python/lib/py4j-0.9.2-src.zip b/python/lib/py4j-0.9.2-src.zip deleted file mode 100644 index 881bb75..0000000 Binary files a/python/lib/py4j-0.9.2-src.zip and /dev/null differ http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/sbin/spark-config.sh ---------------------------------------------------------------------- diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh index 97df433..5f7bf41 100755 --- a/sbin/spark-config.sh +++ b/sbin/spark-config.sh @@ -27,4 +27,4 @@ fi export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" # Add the PySpark classes to the PYTHONPATH: export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9.2-src.zip:${PYTHONPATH}" +export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.1-src.zip:${PYTHONPATH}" http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala ---------------------------------------------------------------------- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 7ea58af..3f6d7b2 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1138,9 +1138,9 @@ private[spark] class Client( val pyArchivesFile = new File(pyLibPath, "pyspark.zip") require(pyArchivesFile.exists(), "pyspark.zip not found; cannot run pyspark application in YARN mode.") - val py4jFile = new File(pyLibPath, "py4j-0.9.2-src.zip") + val py4jFile = new File(pyLibPath, "py4j-0.10.1-src.zip") require(py4jFile.exists(), - "py4j-0.9.2-src.zip not found; cannot run pyspark application in YARN mode.") + "py4j-0.10.1-src.zip not found; cannot run pyspark application in YARN mode.") Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath()) } } http://git-wip-us.apache.org/repos/asf/spark/blob/382dbc12/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala ---------------------------------------------------------------------- diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 7df11ca..c465604 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -197,7 +197,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite { // needed locations. val sparkHome = sys.props("spark.test.home") val pythonPath = Seq( - s"$sparkHome/python/lib/py4j-0.9.2-src.zip", + s"$sparkHome/python/lib/py4j-0.10.1-src.zip", s"$sparkHome/python") val extraEnv = Map( "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator), --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
