This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2cf11cdb04f [SPARK-41854][PYTHON][BUILD] Automatic reformat/check
python/setup.py
2cf11cdb04f is described below
commit 2cf11cdb04f4c8628a991e50470331c3a8682bcd
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Jan 3 13:05:29 2023 +0900
[SPARK-41854][PYTHON][BUILD] Automatic reformat/check python/setup.py
### What changes were proposed in this pull request?
This PR proposes to automatically reformat `python/setup.py` too.
### Why are the changes needed?
To make the development cycle easier.
### Does this PR introduce _any_ user-facing change?
No, dev-only.
### How was this patch tested?
I manually checked via:
```bash
./dev/reformat-python
./dev/lint-python
```
Closes #39352 from HyukjinKwon/SPARK-41854.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
dev/lint-python | 2 +-
dev/reformat-python | 2 +-
python/setup.py | 240 ++++++++++++++++++++++++++++------------------------
3 files changed, 133 insertions(+), 111 deletions(-)
diff --git a/dev/lint-python b/dev/lint-python
index 59ce71980d9..f1f4e9f1070 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -220,7 +220,7 @@ function black_test {
fi
echo "starting black test..."
- BLACK_REPORT=$( ($BLACK_BUILD --config dev/pyproject.toml --check
python/pyspark dev) 2>&1)
+ BLACK_REPORT=$( ($BLACK_BUILD --config dev/pyproject.toml --check
python/pyspark dev python/setup.py) 2>&1)
BLACK_STATUS=$?
if [ "$BLACK_STATUS" -ne 0 ]; then
diff --git a/dev/reformat-python b/dev/reformat-python
index ae2118ab631..9543f5713d1 100755
--- a/dev/reformat-python
+++ b/dev/reformat-python
@@ -29,4 +29,4 @@ if [ $? -ne 0 ]; then
exit 1
fi
-$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev
+$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev python/setup.py
diff --git a/python/setup.py b/python/setup.py
index 54115359a60..faba203a53a 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -25,19 +25,23 @@ from setuptools.command.install import install
from shutil import copyfile, copytree, rmtree
try:
- exec(open('pyspark/version.py').read())
+ exec(open("pyspark/version.py").read())
except IOError:
- print("Failed to load PySpark version file for packaging. You must be in
Spark's python dir.",
- file=sys.stderr)
+ print(
+ "Failed to load PySpark version file for packaging. You must be in
Spark's python dir.",
+ file=sys.stderr,
+ )
sys.exit(-1)
try:
spec = importlib.util.spec_from_file_location("install",
"pyspark/install.py")
install_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(install_module)
except IOError:
- print("Failed to load the installing module (pyspark/install.py) which had
to be "
- "packaged together.",
- file=sys.stderr)
+ print(
+ "Failed to load the installing module (pyspark/install.py) which had
to be "
+ "packaged together.",
+ file=sys.stderr,
+ )
sys.exit(-1)
VERSION = __version__ # noqa
# A temporary path so we can access above the Python project root and fetch
scripts and jars we need
@@ -61,12 +65,16 @@ JARS_PATH = glob.glob(os.path.join(SPARK_HOME,
"assembly/target/scala-*/jars/"))
if len(JARS_PATH) == 1:
JARS_PATH = JARS_PATH[0]
-elif (os.path.isfile("../RELEASE") and
len(glob.glob("../jars/spark*core*.jar")) == 1):
+elif os.path.isfile("../RELEASE") and
len(glob.glob("../jars/spark*core*.jar")) == 1:
# Release mode puts the jars in a jars directory
JARS_PATH = os.path.join(SPARK_HOME, "jars")
elif len(JARS_PATH) > 1:
- print("Assembly jars exist for multiple scalas ({0}), please cleanup
assembly/target".format(
- JARS_PATH), file=sys.stderr)
+ print(
+ "Assembly jars exist for multiple scalas ({0}), please cleanup
assembly/target".format(
+ JARS_PATH
+ ),
+ file=sys.stderr,
+ )
sys.exit(-1)
elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH):
print(incorrect_invocation_message, file=sys.stderr)
@@ -89,8 +97,9 @@ LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses")
# This is important because we only want to build the symlink farm while under
Spark otherwise we
# want to use the symlink farm. And if the symlink farm exists under while
under Spark (e.g. a
# partially built sdist) we should error and have the user sort it out.
-in_spark =
(os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or
- (os.path.isfile("../RELEASE") and
len(glob.glob("../jars/spark*core*.jar")) == 1))
+in_spark =
os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or
(
+ os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar"))
== 1
+)
def _supports_symlinks():
@@ -98,13 +107,14 @@ def _supports_symlinks():
return getattr(os, "symlink", None) is not None
-if (in_spark):
+if in_spark:
# Construct links for setup
try:
os.mkdir(TEMP_PATH)
except BaseException:
- print("Temp path for symlink to parent already exists
{0}".format(TEMP_PATH),
- file=sys.stderr)
+ print(
+ "Temp path for symlink to parent already exists
{0}".format(TEMP_PATH), file=sys.stderr
+ )
sys.exit(-1)
# If you are changing the versions here, please also change
./python/pyspark/sql/pandas/utils.py
@@ -134,11 +144,13 @@ class InstallCommand(install):
spark_version, hadoop_version, hive_version =
install_module.checked_versions(
os.environ.get("PYSPARK_VERSION", VERSION).lower(),
os.environ.get("PYSPARK_HADOOP_VERSION",
install_module.DEFAULT_HADOOP).lower(),
- os.environ.get("PYSPARK_HIVE_VERSION",
install_module.DEFAULT_HIVE).lower())
+ os.environ.get("PYSPARK_HIVE_VERSION",
install_module.DEFAULT_HIVE).lower(),
+ )
- if ("PYSPARK_VERSION" not in os.environ and
- ((install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE)
==
- (hadoop_version, hive_version))):
+ if "PYSPARK_VERSION" not in os.environ and (
+ (install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE)
+ == (hadoop_version, hive_version)
+ ):
# Do not download and install if they are same as default.
return
@@ -146,7 +158,8 @@ class InstallCommand(install):
dest=spark_dist,
spark_version=spark_version,
hadoop_version=hadoop_version,
- hive_version=hive_version)
+ hive_version=hive_version,
+ )
try:
@@ -160,7 +173,7 @@ try:
pass
copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py")
- if (in_spark):
+ if in_spark:
# Construct the symlink farm - this is necessary since we can't refer
to the path above the
# package root and we need to copy the jars and scripts which are up
above the python root.
if _supports_symlinks():
@@ -181,8 +194,10 @@ try:
else:
# If we are not inside of SPARK_HOME verify we have the required
symlink farm
if not os.path.exists(JARS_TARGET):
- print("To build packaging must be in the python directory under
the SPARK_HOME.",
- file=sys.stderr)
+ print(
+ "To build packaging must be in the python directory under the
SPARK_HOME.",
+ file=sys.stderr,
+ )
if not os.path.isdir(SCRIPTS_TARGET):
print(incorrect_invocation_message, file=sys.stderr)
@@ -195,118 +210,125 @@ try:
# will search for SPARK_HOME with Python.
scripts.append("pyspark/find_spark_home.py")
- with open('README.md') as f:
+ with open("README.md") as f:
long_description = f.read()
setup(
- name='pyspark',
+ name="pyspark",
version=VERSION,
- description='Apache Spark Python API',
+ description="Apache Spark Python API",
long_description=long_description,
long_description_content_type="text/markdown",
- author='Spark Developers',
- author_email='[email protected]',
- url='https://github.com/apache/spark/tree/master/python',
- packages=['pyspark',
- 'pyspark.cloudpickle',
- 'pyspark.mllib',
- 'pyspark.mllib.linalg',
- 'pyspark.mllib.stat',
- 'pyspark.ml',
- 'pyspark.ml.linalg',
- 'pyspark.ml.param',
- 'pyspark.sql',
- 'pyspark.sql.avro',
- 'pyspark.sql.connect',
- 'pyspark.sql.connect.proto',
- 'pyspark.sql.pandas',
- 'pyspark.sql.protobuf',
- 'pyspark.sql.streaming',
- 'pyspark.streaming',
- 'pyspark.bin',
- 'pyspark.sbin',
- 'pyspark.jars',
- 'pyspark.pandas',
- 'pyspark.pandas.data_type_ops',
- 'pyspark.pandas.indexes',
- 'pyspark.pandas.missing',
- 'pyspark.pandas.plot',
- 'pyspark.pandas.spark',
- 'pyspark.pandas.typedef',
- 'pyspark.pandas.usage_logging',
- 'pyspark.python.pyspark',
- 'pyspark.python.lib',
- 'pyspark.data',
- 'pyspark.licenses',
- 'pyspark.resource',
- 'pyspark.examples.src.main.python'],
+ author="Spark Developers",
+ author_email="[email protected]",
+ url="https://github.com/apache/spark/tree/master/python",
+ packages=[
+ "pyspark",
+ "pyspark.cloudpickle",
+ "pyspark.mllib",
+ "pyspark.mllib.linalg",
+ "pyspark.mllib.stat",
+ "pyspark.ml",
+ "pyspark.ml.linalg",
+ "pyspark.ml.param",
+ "pyspark.sql",
+ "pyspark.sql.avro",
+ "pyspark.sql.connect",
+ "pyspark.sql.connect.proto",
+ "pyspark.sql.pandas",
+ "pyspark.sql.protobuf",
+ "pyspark.sql.streaming",
+ "pyspark.streaming",
+ "pyspark.bin",
+ "pyspark.sbin",
+ "pyspark.jars",
+ "pyspark.pandas",
+ "pyspark.pandas.data_type_ops",
+ "pyspark.pandas.indexes",
+ "pyspark.pandas.missing",
+ "pyspark.pandas.plot",
+ "pyspark.pandas.spark",
+ "pyspark.pandas.typedef",
+ "pyspark.pandas.usage_logging",
+ "pyspark.python.pyspark",
+ "pyspark.python.lib",
+ "pyspark.data",
+ "pyspark.licenses",
+ "pyspark.resource",
+ "pyspark.examples.src.main.python",
+ ],
include_package_data=True,
package_dir={
- 'pyspark.jars': 'deps/jars',
- 'pyspark.bin': 'deps/bin',
- 'pyspark.sbin': 'deps/sbin',
- 'pyspark.python.lib': 'lib',
- 'pyspark.data': 'deps/data',
- 'pyspark.licenses': 'deps/licenses',
- 'pyspark.examples.src.main.python': 'deps/examples',
+ "pyspark.jars": "deps/jars",
+ "pyspark.bin": "deps/bin",
+ "pyspark.sbin": "deps/sbin",
+ "pyspark.python.lib": "lib",
+ "pyspark.data": "deps/data",
+ "pyspark.licenses": "deps/licenses",
+ "pyspark.examples.src.main.python": "deps/examples",
},
package_data={
- 'pyspark.jars': ['*.jar'],
- 'pyspark.bin': ['*'],
- 'pyspark.sbin': ['spark-config.sh', 'spark-daemon.sh',
- 'start-history-server.sh',
- 'stop-history-server.sh', ],
- 'pyspark.python.lib': ['*.zip'],
- 'pyspark.data': ['*.txt', '*.data'],
- 'pyspark.licenses': ['*.txt'],
- 'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
+ "pyspark.jars": ["*.jar"],
+ "pyspark.bin": ["*"],
+ "pyspark.sbin": [
+ "spark-config.sh",
+ "spark-daemon.sh",
+ "start-history-server.sh",
+ "stop-history-server.sh",
+ ],
+ "pyspark.python.lib": ["*.zip"],
+ "pyspark.data": ["*.txt", "*.data"],
+ "pyspark.licenses": ["*.txt"],
+ "pyspark.examples.src.main.python": ["*.py", "*/*.py"],
+ },
scripts=scripts,
- license='http://www.apache.org/licenses/LICENSE-2.0',
+ license="http://www.apache.org/licenses/LICENSE-2.0",
# Don't forget to update python/docs/source/getting_started/install.rst
# if you're updating the versions or dependencies.
- install_requires=['py4j==0.10.9.7'],
+ install_requires=["py4j==0.10.9.7"],
extras_require={
- 'ml': ['numpy>=1.15'],
- 'mllib': ['numpy>=1.15'],
- 'sql': [
- 'pandas>=%s' % _minimum_pandas_version,
- 'pyarrow>=%s' % _minimum_pyarrow_version,
- 'numpy>=1.15',
+ "ml": ["numpy>=1.15"],
+ "mllib": ["numpy>=1.15"],
+ "sql": [
+ "pandas>=%s" % _minimum_pandas_version,
+ "pyarrow>=%s" % _minimum_pyarrow_version,
+ "numpy>=1.15",
],
- 'pandas_on_spark': [
- 'pandas>=%s' % _minimum_pandas_version,
- 'pyarrow>=%s' % _minimum_pyarrow_version,
- 'numpy>=1.15',
+ "pandas_on_spark": [
+ "pandas>=%s" % _minimum_pandas_version,
+ "pyarrow>=%s" % _minimum_pyarrow_version,
+ "numpy>=1.15",
],
- 'connect': [
- 'pandas>=%s' % _minimum_pandas_version,
- 'pyarrow>=%s' % _minimum_pyarrow_version,
- 'grpcio>=%s' % _minimum_grpc_version,
- 'grpcio-status>=%s' % _minimum_grpc_version,
- 'googleapis-common-protos>=%s' %
_minimum_googleapis_common_protos_version,
- 'numpy>=1.15',
+ "connect": [
+ "pandas>=%s" % _minimum_pandas_version,
+ "pyarrow>=%s" % _minimum_pyarrow_version,
+ "grpcio>=%s" % _minimum_grpc_version,
+ "grpcio-status>=%s" % _minimum_grpc_version,
+ "googleapis-common-protos>=%s" %
_minimum_googleapis_common_protos_version,
+ "numpy>=1.15",
],
},
- python_requires='>=3.7',
+ python_requires=">=3.7",
classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'License :: OSI Approved :: Apache Software License',
- 'Programming Language :: Python :: 3.7',
- 'Programming Language :: Python :: 3.8',
- 'Programming Language :: Python :: 3.9',
- 'Programming Language :: Python :: 3.10',
- 'Programming Language :: Python :: 3.11',
- 'Programming Language :: Python :: Implementation :: CPython',
- 'Programming Language :: Python :: Implementation :: PyPy',
- 'Typing :: Typed'],
+ "Development Status :: 5 - Production/Stable",
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: PyPy",
+ "Typing :: Typed",
+ ],
cmdclass={
- 'install': InstallCommand,
+ "install": InstallCommand,
},
)
finally:
# We only cleanup the symlink farm if we were in Spark, otherwise we are
installing rather than
# packaging.
- if (in_spark):
+ if in_spark:
# Depending on cleaning up the symlink farm or copied version
if _supports_symlinks():
os.remove(os.path.join(TEMP_PATH, "jars"))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]