Repository: spark Updated Branches: refs/heads/master b3af917e7 -> e4cb42ad8
[SPARK-25891][PYTHON] Upgrade to Py4J 0.10.8.1 ## What changes were proposed in this pull request? Py4J 0.10.8.1 is released on October 21st and is the first release of Py4J to support Python 3.7 officially. We had better have this to get the official support. Also, there are some patches related to garbage collections. https://www.py4j.org/changelog.html#py4j-0-10-8-and-py4j-0-10-8-1 ## How was this patch tested? Pass the Jenkins. Closes #22901 from dongjoon-hyun/SPARK-25891. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e4cb42ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e4cb42ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e4cb42ad Branch: refs/heads/master Commit: e4cb42ad89307ebc5a1bd9660c86219340d71ff6 Parents: b3af917 Author: Dongjoon Hyun <[email protected]> Authored: Wed Oct 31 09:55:03 2018 -0700 Committer: Dongjoon Hyun <[email protected]> Committed: Wed Oct 31 09:55:03 2018 -0700 ---------------------------------------------------------------------- bin/pyspark | 2 +- bin/pyspark2.cmd | 2 +- core/pom.xml | 2 +- .../org/apache/spark/api/python/PythonUtils.scala | 3 ++- dev/deps/spark-deps-hadoop-2.7 | 2 +- dev/deps/spark-deps-hadoop-3.1 | 2 +- python/README.md | 2 +- python/docs/Makefile | 2 +- python/lib/py4j-0.10.7-src.zip | Bin 42437 -> 0 bytes python/lib/py4j-0.10.8.1-src.zip | Bin 0 -> 41255 bytes python/setup.py | 2 +- .../org/apache/spark/deploy/yarn/Client.scala | 2 +- .../spark/deploy/yarn/YarnClusterSuite.scala | 2 +- sbin/spark-config.sh | 2 +- 14 files changed, 13 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/bin/pyspark ---------------------------------------------------------------------- diff --git a/bin/pyspark b/bin/pyspark index 5d5affb..1dcddcc 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -57,7 +57,7 @@ export PYSPARK_PYTHON # Add the PySpark classes to the Python path: export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH" +export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:$PYTHONPATH" # Load the PySpark shell.py script when ./pyspark is used interactively: export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/bin/pyspark2.cmd ---------------------------------------------------------------------- diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index 15fa910..479fd46 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( ) set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% -set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH% +set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.8.1-src.zip;%PYTHONPATH% set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/core/pom.xml ---------------------------------------------------------------------- diff --git a/core/pom.xml b/core/pom.xml index eff3aa1..f23d09f 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -350,7 +350,7 @@ <dependency> <groupId>net.sf.py4j</groupId> <artifactId>py4j</artifactId> - <version>0.10.7</version> + <version>0.10.8.1</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index cdce371..b6b0cac 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -32,7 +32,8 @@ private[spark] object PythonUtils { val pythonPath = new ArrayBuffer[String] for (sparkHome <- sys.env.get("SPARK_HOME")) { pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator) - pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.7-src.zip").mkString(File.separator) + pythonPath += + Seq(sparkHome, "python", "lib", "py4j-0.10.8.1-src.zip").mkString(File.separator) } pythonPath ++= SparkContext.jarOfObject(this) pythonPath.mkString(File.pathSeparator) http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/dev/deps/spark-deps-hadoop-2.7 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 0703b5b..db84b85 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -168,7 +168,7 @@ parquet-hadoop-1.10.0.jar parquet-hadoop-bundle-1.6.0.jar parquet-jackson-1.10.0.jar protobuf-java-2.5.0.jar -py4j-0.10.7.jar +py4j-0.10.8.1.jar pyrolite-4.13.jar scala-compiler-2.11.12.jar scala-library-2.11.12.jar http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/dev/deps/spark-deps-hadoop-3.1 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1 index 5139868..befb93d 100644 --- a/dev/deps/spark-deps-hadoop-3.1 +++ b/dev/deps/spark-deps-hadoop-3.1 @@ -186,7 +186,7 @@ parquet-hadoop-1.10.0.jar parquet-hadoop-bundle-1.6.0.jar parquet-jackson-1.10.0.jar protobuf-java-2.5.0.jar -py4j-0.10.7.jar +py4j-0.10.8.1.jar pyrolite-4.13.jar re2j-1.1.jar scala-compiler-2.11.12.jar http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/python/README.md ---------------------------------------------------------------------- diff --git a/python/README.md b/python/README.md index c020d84..ffb6147 100644 --- a/python/README.md +++ b/python/README.md @@ -29,4 +29,4 @@ The Python packaging for Spark is not intended to replace all of the other use c ## Python Requirements -At its core PySpark depends on Py4J (currently version 0.10.7), but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow). +At its core PySpark depends on Py4J (currently version 0.10.8.1), but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow). http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/python/docs/Makefile ---------------------------------------------------------------------- diff --git a/python/docs/Makefile b/python/docs/Makefile index 1ed1f33..4767fd9 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -37,7 +37,7 @@ BUILDDIR ?= _build # 2. If both are set, SPHINXBUILD has a higher priority over SPHINXPYTHON # 3. By default, SPHINXBUILD is used as 'sphinx-build'. -export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.7-src.zip) +export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.8.1-src.zip) # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/python/lib/py4j-0.10.7-src.zip ---------------------------------------------------------------------- diff --git a/python/lib/py4j-0.10.7-src.zip b/python/lib/py4j-0.10.7-src.zip deleted file mode 100644 index 128e321..0000000 Binary files a/python/lib/py4j-0.10.7-src.zip and /dev/null differ http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/python/lib/py4j-0.10.8.1-src.zip ---------------------------------------------------------------------- diff --git a/python/lib/py4j-0.10.8.1-src.zip b/python/lib/py4j-0.10.8.1-src.zip new file mode 100644 index 0000000..1b5dede Binary files /dev/null and b/python/lib/py4j-0.10.8.1-src.zip differ http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/python/setup.py ---------------------------------------------------------------------- diff --git a/python/setup.py b/python/setup.py index c447f2d..7da67a4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -201,7 +201,7 @@ try: 'pyspark.examples.src.main.python': ['*.py', '*/*.py']}, scripts=scripts, license='http://www.apache.org/licenses/LICENSE-2.0', - install_requires=['py4j==0.10.7'], + install_requires=['py4j==0.10.8.1'], setup_requires=['pypandoc'], extras_require={ 'ml': ['numpy>=1.7'], http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala ---------------------------------------------------------------------- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 67d2c86..49b7f62 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1169,7 +1169,7 @@ private[spark] class Client( val pyArchivesFile = new File(pyLibPath, "pyspark.zip") require(pyArchivesFile.exists(), s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.") - val py4jFile = new File(pyLibPath, "py4j-0.10.7-src.zip") + val py4jFile = new File(pyLibPath, "py4j-0.10.8.1-src.zip") require(py4jFile.exists(), s"$py4jFile not found; cannot run pyspark application in YARN mode.") Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath()) http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala ---------------------------------------------------------------------- diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 58d11e9..506b27c 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -265,7 +265,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite { // needed locations. val sparkHome = sys.props("spark.test.home") val pythonPath = Seq( - s"$sparkHome/python/lib/py4j-0.10.7-src.zip", + s"$sparkHome/python/lib/py4j-0.10.8.1-src.zip", s"$sparkHome/python") val extraEnvVars = Map( "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator), http://git-wip-us.apache.org/repos/asf/spark/blob/e4cb42ad/sbin/spark-config.sh ---------------------------------------------------------------------- diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh index bf3da18..0771e2a 100755 --- a/sbin/spark-config.sh +++ b/sbin/spark-config.sh @@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" # Add the PySpark classes to the PYTHONPATH: if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" - export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:${PYTHONPATH}" + export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:${PYTHONPATH}" export PYSPARK_PYTHONPATH_SET=1 fi --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
