setup.py

gurwls223 Mon, 02 Jan 2023 20:05:50 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 2cf11cdb04f [SPARK-41854][PYTHON][BUILD] Automatic reformat/check 
python/setup.py
2cf11cdb04f is described below

commit 2cf11cdb04f4c8628a991e50470331c3a8682bcd
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Jan 3 13:05:29 2023 +0900

    [SPARK-41854][PYTHON][BUILD] Automatic reformat/check python/setup.py
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to automatically reformat `python/setup.py` too.
    
    ### Why are the changes needed?
    
    To make the development cycle easier.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, dev-only.
    
    ### How was this patch tested?
    
    I manually checked via:
    
    ```bash
    ./dev/reformat-python
    ./dev/lint-python
    ```
    
    Closes #39352 from HyukjinKwon/SPARK-41854.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 dev/lint-python     |   2 +-
 dev/reformat-python |   2 +-
 python/setup.py     | 240 ++++++++++++++++++++++++++++------------------------
 3 files changed, 133 insertions(+), 111 deletions(-)

diff --git a/dev/lint-python b/dev/lint-python
index 59ce71980d9..f1f4e9f1070 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -220,7 +220,7 @@ function black_test {
     fi
 
     echo "starting black test..."
-    BLACK_REPORT=$( ($BLACK_BUILD  --config dev/pyproject.toml --check 
python/pyspark dev) 2>&1)
+    BLACK_REPORT=$( ($BLACK_BUILD  --config dev/pyproject.toml --check 
python/pyspark dev python/setup.py) 2>&1)
     BLACK_STATUS=$?
 
     if [ "$BLACK_STATUS" -ne 0 ]; then
diff --git a/dev/reformat-python b/dev/reformat-python
index ae2118ab631..9543f5713d1 100755
--- a/dev/reformat-python
+++ b/dev/reformat-python
@@ -29,4 +29,4 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev
+$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev python/setup.py
diff --git a/python/setup.py b/python/setup.py
index 54115359a60..faba203a53a 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -25,19 +25,23 @@ from setuptools.command.install import install
 from shutil import copyfile, copytree, rmtree
 
 try:
-    exec(open('pyspark/version.py').read())
+    exec(open("pyspark/version.py").read())
 except IOError:
-    print("Failed to load PySpark version file for packaging. You must be in 
Spark's python dir.",
-          file=sys.stderr)
+    print(
+        "Failed to load PySpark version file for packaging. You must be in 
Spark's python dir.",
+        file=sys.stderr,
+    )
     sys.exit(-1)
 try:
     spec = importlib.util.spec_from_file_location("install", 
"pyspark/install.py")
     install_module = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(install_module)
 except IOError:
-    print("Failed to load the installing module (pyspark/install.py) which had 
to be "
-          "packaged together.",
-          file=sys.stderr)
+    print(
+        "Failed to load the installing module (pyspark/install.py) which had 
to be "
+        "packaged together.",
+        file=sys.stderr,
+    )
     sys.exit(-1)
 VERSION = __version__  # noqa
 # A temporary path so we can access above the Python project root and fetch 
scripts and jars we need
@@ -61,12 +65,16 @@ JARS_PATH = glob.glob(os.path.join(SPARK_HOME, 
"assembly/target/scala-*/jars/"))
 
 if len(JARS_PATH) == 1:
     JARS_PATH = JARS_PATH[0]
-elif (os.path.isfile("../RELEASE") and 
len(glob.glob("../jars/spark*core*.jar")) == 1):
+elif os.path.isfile("../RELEASE") and 
len(glob.glob("../jars/spark*core*.jar")) == 1:
     # Release mode puts the jars in a jars directory
     JARS_PATH = os.path.join(SPARK_HOME, "jars")
 elif len(JARS_PATH) > 1:
-    print("Assembly jars exist for multiple scalas ({0}), please cleanup 
assembly/target".format(
-        JARS_PATH), file=sys.stderr)
+    print(
+        "Assembly jars exist for multiple scalas ({0}), please cleanup 
assembly/target".format(
+            JARS_PATH
+        ),
+        file=sys.stderr,
+    )
     sys.exit(-1)
 elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH):
     print(incorrect_invocation_message, file=sys.stderr)
@@ -89,8 +97,9 @@ LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses")
 # This is important because we only want to build the symlink farm while under 
Spark otherwise we
 # want to use the symlink farm. And if the symlink farm exists under while 
under Spark (e.g. a
 # partially built sdist) we should error and have the user sort it out.
-in_spark = 
(os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or
-            (os.path.isfile("../RELEASE") and 
len(glob.glob("../jars/spark*core*.jar")) == 1))
+in_spark = 
os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or 
(
+    os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) 
== 1
+)
 
 
 def _supports_symlinks():
@@ -98,13 +107,14 @@ def _supports_symlinks():
     return getattr(os, "symlink", None) is not None
 
 
-if (in_spark):
+if in_spark:
     # Construct links for setup
     try:
         os.mkdir(TEMP_PATH)
     except BaseException:
-        print("Temp path for symlink to parent already exists 
{0}".format(TEMP_PATH),
-              file=sys.stderr)
+        print(
+            "Temp path for symlink to parent already exists 
{0}".format(TEMP_PATH), file=sys.stderr
+        )
         sys.exit(-1)
 
 # If you are changing the versions here, please also change 
./python/pyspark/sql/pandas/utils.py
@@ -134,11 +144,13 @@ class InstallCommand(install):
             spark_version, hadoop_version, hive_version = 
install_module.checked_versions(
                 os.environ.get("PYSPARK_VERSION", VERSION).lower(),
                 os.environ.get("PYSPARK_HADOOP_VERSION", 
install_module.DEFAULT_HADOOP).lower(),
-                os.environ.get("PYSPARK_HIVE_VERSION", 
install_module.DEFAULT_HIVE).lower())
+                os.environ.get("PYSPARK_HIVE_VERSION", 
install_module.DEFAULT_HIVE).lower(),
+            )
 
-            if ("PYSPARK_VERSION" not in os.environ and
-                ((install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE) 
==
-                    (hadoop_version, hive_version))):
+            if "PYSPARK_VERSION" not in os.environ and (
+                (install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE)
+                == (hadoop_version, hive_version)
+            ):
                 # Do not download and install if they are same as default.
                 return
 
@@ -146,7 +158,8 @@ class InstallCommand(install):
                 dest=spark_dist,
                 spark_version=spark_version,
                 hadoop_version=hadoop_version,
-                hive_version=hive_version)
+                hive_version=hive_version,
+            )
 
 
 try:
@@ -160,7 +173,7 @@ try:
         pass
     copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py")
 
-    if (in_spark):
+    if in_spark:
         # Construct the symlink farm - this is necessary since we can't refer 
to the path above the
         # package root and we need to copy the jars and scripts which are up 
above the python root.
         if _supports_symlinks():
@@ -181,8 +194,10 @@ try:
     else:
         # If we are not inside of SPARK_HOME verify we have the required 
symlink farm
         if not os.path.exists(JARS_TARGET):
-            print("To build packaging must be in the python directory under 
the SPARK_HOME.",
-                  file=sys.stderr)
+            print(
+                "To build packaging must be in the python directory under the 
SPARK_HOME.",
+                file=sys.stderr,
+            )
 
     if not os.path.isdir(SCRIPTS_TARGET):
         print(incorrect_invocation_message, file=sys.stderr)
@@ -195,118 +210,125 @@ try:
     # will search for SPARK_HOME with Python.
     scripts.append("pyspark/find_spark_home.py")
 
-    with open('README.md') as f:
+    with open("README.md") as f:
         long_description = f.read()
 
     setup(
-        name='pyspark',
+        name="pyspark",
         version=VERSION,
-        description='Apache Spark Python API',
+        description="Apache Spark Python API",
         long_description=long_description,
         long_description_content_type="text/markdown",
-        author='Spark Developers',
-        author_email='[email protected]',
-        url='https://github.com/apache/spark/tree/master/python',
-        packages=['pyspark',
-                  'pyspark.cloudpickle',
-                  'pyspark.mllib',
-                  'pyspark.mllib.linalg',
-                  'pyspark.mllib.stat',
-                  'pyspark.ml',
-                  'pyspark.ml.linalg',
-                  'pyspark.ml.param',
-                  'pyspark.sql',
-                  'pyspark.sql.avro',
-                  'pyspark.sql.connect',
-                  'pyspark.sql.connect.proto',
-                  'pyspark.sql.pandas',
-                  'pyspark.sql.protobuf',
-                  'pyspark.sql.streaming',
-                  'pyspark.streaming',
-                  'pyspark.bin',
-                  'pyspark.sbin',
-                  'pyspark.jars',
-                  'pyspark.pandas',
-                  'pyspark.pandas.data_type_ops',
-                  'pyspark.pandas.indexes',
-                  'pyspark.pandas.missing',
-                  'pyspark.pandas.plot',
-                  'pyspark.pandas.spark',
-                  'pyspark.pandas.typedef',
-                  'pyspark.pandas.usage_logging',
-                  'pyspark.python.pyspark',
-                  'pyspark.python.lib',
-                  'pyspark.data',
-                  'pyspark.licenses',
-                  'pyspark.resource',
-                  'pyspark.examples.src.main.python'],
+        author="Spark Developers",
+        author_email="[email protected]",
+        url="https://github.com/apache/spark/tree/master/python";,
+        packages=[
+            "pyspark",
+            "pyspark.cloudpickle",
+            "pyspark.mllib",
+            "pyspark.mllib.linalg",
+            "pyspark.mllib.stat",
+            "pyspark.ml",
+            "pyspark.ml.linalg",
+            "pyspark.ml.param",
+            "pyspark.sql",
+            "pyspark.sql.avro",
+            "pyspark.sql.connect",
+            "pyspark.sql.connect.proto",
+            "pyspark.sql.pandas",
+            "pyspark.sql.protobuf",
+            "pyspark.sql.streaming",
+            "pyspark.streaming",
+            "pyspark.bin",
+            "pyspark.sbin",
+            "pyspark.jars",
+            "pyspark.pandas",
+            "pyspark.pandas.data_type_ops",
+            "pyspark.pandas.indexes",
+            "pyspark.pandas.missing",
+            "pyspark.pandas.plot",
+            "pyspark.pandas.spark",
+            "pyspark.pandas.typedef",
+            "pyspark.pandas.usage_logging",
+            "pyspark.python.pyspark",
+            "pyspark.python.lib",
+            "pyspark.data",
+            "pyspark.licenses",
+            "pyspark.resource",
+            "pyspark.examples.src.main.python",
+        ],
         include_package_data=True,
         package_dir={
-            'pyspark.jars': 'deps/jars',
-            'pyspark.bin': 'deps/bin',
-            'pyspark.sbin': 'deps/sbin',
-            'pyspark.python.lib': 'lib',
-            'pyspark.data': 'deps/data',
-            'pyspark.licenses': 'deps/licenses',
-            'pyspark.examples.src.main.python': 'deps/examples',
+            "pyspark.jars": "deps/jars",
+            "pyspark.bin": "deps/bin",
+            "pyspark.sbin": "deps/sbin",
+            "pyspark.python.lib": "lib",
+            "pyspark.data": "deps/data",
+            "pyspark.licenses": "deps/licenses",
+            "pyspark.examples.src.main.python": "deps/examples",
         },
         package_data={
-            'pyspark.jars': ['*.jar'],
-            'pyspark.bin': ['*'],
-            'pyspark.sbin': ['spark-config.sh', 'spark-daemon.sh',
-                             'start-history-server.sh',
-                             'stop-history-server.sh', ],
-            'pyspark.python.lib': ['*.zip'],
-            'pyspark.data': ['*.txt', '*.data'],
-            'pyspark.licenses': ['*.txt'],
-            'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
+            "pyspark.jars": ["*.jar"],
+            "pyspark.bin": ["*"],
+            "pyspark.sbin": [
+                "spark-config.sh",
+                "spark-daemon.sh",
+                "start-history-server.sh",
+                "stop-history-server.sh",
+            ],
+            "pyspark.python.lib": ["*.zip"],
+            "pyspark.data": ["*.txt", "*.data"],
+            "pyspark.licenses": ["*.txt"],
+            "pyspark.examples.src.main.python": ["*.py", "*/*.py"],
+        },
         scripts=scripts,
-        license='http://www.apache.org/licenses/LICENSE-2.0',
+        license="http://www.apache.org/licenses/LICENSE-2.0";,
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
-        install_requires=['py4j==0.10.9.7'],
+        install_requires=["py4j==0.10.9.7"],
         extras_require={
-            'ml': ['numpy>=1.15'],
-            'mllib': ['numpy>=1.15'],
-            'sql': [
-                'pandas>=%s' % _minimum_pandas_version,
-                'pyarrow>=%s' % _minimum_pyarrow_version,
-                'numpy>=1.15',
+            "ml": ["numpy>=1.15"],
+            "mllib": ["numpy>=1.15"],
+            "sql": [
+                "pandas>=%s" % _minimum_pandas_version,
+                "pyarrow>=%s" % _minimum_pyarrow_version,
+                "numpy>=1.15",
             ],
-            'pandas_on_spark': [
-                'pandas>=%s' % _minimum_pandas_version,
-                'pyarrow>=%s' % _minimum_pyarrow_version,
-                'numpy>=1.15',
+            "pandas_on_spark": [
+                "pandas>=%s" % _minimum_pandas_version,
+                "pyarrow>=%s" % _minimum_pyarrow_version,
+                "numpy>=1.15",
             ],
-            'connect': [
-                'pandas>=%s' % _minimum_pandas_version,
-                'pyarrow>=%s' % _minimum_pyarrow_version,
-                'grpcio>=%s' % _minimum_grpc_version,
-                'grpcio-status>=%s' % _minimum_grpc_version,
-                'googleapis-common-protos>=%s' % 
_minimum_googleapis_common_protos_version,
-                'numpy>=1.15',
+            "connect": [
+                "pandas>=%s" % _minimum_pandas_version,
+                "pyarrow>=%s" % _minimum_pyarrow_version,
+                "grpcio>=%s" % _minimum_grpc_version,
+                "grpcio-status>=%s" % _minimum_grpc_version,
+                "googleapis-common-protos>=%s" % 
_minimum_googleapis_common_protos_version,
+                "numpy>=1.15",
             ],
         },
-        python_requires='>=3.7',
+        python_requires=">=3.7",
         classifiers=[
-            'Development Status :: 5 - Production/Stable',
-            'License :: OSI Approved :: Apache Software License',
-            'Programming Language :: Python :: 3.7',
-            'Programming Language :: Python :: 3.8',
-            'Programming Language :: Python :: 3.9',
-            'Programming Language :: Python :: 3.10',
-            'Programming Language :: Python :: 3.11',
-            'Programming Language :: Python :: Implementation :: CPython',
-            'Programming Language :: Python :: Implementation :: PyPy',
-            'Typing :: Typed'],
+            "Development Status :: 5 - Production/Stable",
+            "License :: OSI Approved :: Apache Software License",
+            "Programming Language :: Python :: 3.7",
+            "Programming Language :: Python :: 3.8",
+            "Programming Language :: Python :: 3.9",
+            "Programming Language :: Python :: 3.10",
+            "Programming Language :: Python :: 3.11",
+            "Programming Language :: Python :: Implementation :: CPython",
+            "Programming Language :: Python :: Implementation :: PyPy",
+            "Typing :: Typed",
+        ],
         cmdclass={
-            'install': InstallCommand,
+            "install": InstallCommand,
         },
     )
 finally:
     # We only cleanup the symlink farm if we were in Spark, otherwise we are 
installing rather than
     # packaging.
-    if (in_spark):
+    if in_spark:
         # Depending on cleaning up the symlink farm or copied version
         if _supports_symlinks():
             os.remove(os.path.join(TEMP_PATH, "jars"))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-41854][PYTHON][BUILD] Automatic reformat/check python/setup.py

Reply via email to