(spark) branch master updated: [SPARK-46254][PYTHON] Remove stale Python 3.8/3.7 version checking

gurwls223 Mon, 04 Dec 2023 18:45:04 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new f3e113b687be [SPARK-46254][PYTHON] Remove stale Python 3.8/3.7 version 
checking
f3e113b687be is described below

commit f3e113b687be328ba0d318ec03977aba3036b86f
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Dec 5 11:44:37 2023 +0900

    [SPARK-46254][PYTHON] Remove stale Python 3.8/3.7 version checking
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to remove stale Python 3.8/3.7 version checking in the 
codebase.
    
    ### Why are the changes needed?
    
    To remove unnecessary version comparison
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Manually tested the version comparison.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #44169 from HyukjinKwon/remove-python38.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/pandas/__init__.py                   | 21 ++++++++++-----------
 python/pyspark/pandas/frame.py                      |  4 +---
 .../pandas/tests/computation/test_apply_func.py     |  5 +++--
 python/pyspark/pandas/tests/test_typedef.py         |  6 ++++--
 python/pyspark/pandas/typedef/typehints.py          |  7 +++++--
 5 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/python/pyspark/pandas/__init__.py 
b/python/pyspark/pandas/__init__.py
index 44365c8e6b88..65366f544092 100644
--- a/python/pyspark/pandas/__init__.py
+++ b/python/pyspark/pandas/__init__.py
@@ -122,17 +122,16 @@ def _auto_patch_pandas() -> None:
     _frame_has_class_getitem = hasattr(pd.DataFrame, "__class_getitem__")
     _series_has_class_getitem = hasattr(pd.Series, "__class_getitem__")
 
-    if sys.version_info >= (3, 7):
-        # Just in case pandas implements '__class_getitem__' later.
-        if not _frame_has_class_getitem:
-            pd.DataFrame.__class_getitem__ = (  # type: ignore[attr-defined]
-                lambda params: DataFrame.__class_getitem__(params)
-            )
-
-        if not _series_has_class_getitem:
-            pd.Series.__class_getitem__ = (  # type: ignore[attr-defined]
-                lambda params: Series.__class_getitem__(params)
-            )
+    # Just in case pandas implements '__class_getitem__' later.
+    if not _frame_has_class_getitem:
+        pd.DataFrame.__class_getitem__ = (  # type: ignore[attr-defined]
+            lambda params: DataFrame.__class_getitem__(params)
+        )
+
+    if not _series_has_class_getitem:
+        pd.Series.__class_getitem__ = (  # type: ignore[attr-defined]
+            lambda params: Series.__class_getitem__(params)
+        )
 
 
 _auto_patch_spark()
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index a54316dffeb4..9846dc0ae10b 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -2103,9 +2103,7 @@ class DataFrame(Frame, Generic[T]):
             v = [row[c] for c in data_spark_column_names]
             return k, v
 
-        can_return_named_tuples = sys.version_info >= (3, 7) or 
len(self.columns) + index < 255
-
-        if name is not None and can_return_named_tuples:
+        if name is not None:
             itertuple = namedtuple(name, fields, rename=True)  # type: 
ignore[misc]
             for k, v in map(
                 extract_kv_from_spark_row,
diff --git a/python/pyspark/pandas/tests/computation/test_apply_func.py 
b/python/pyspark/pandas/tests/computation/test_apply_func.py
index 93d9d56a479a..00b14441991a 100644
--- a/python/pyspark/pandas/tests/computation/test_apply_func.py
+++ b/python/pyspark/pandas/tests/computation/test_apply_func.py
@@ -23,6 +23,7 @@ import numpy as np
 import pandas as pd
 
 from pyspark import pandas as ps
+from pyspark.loose_version import LooseVersion
 from pyspark.pandas.config import option_context
 from pyspark.testing.pandasutils import ComparisonTestBase
 from pyspark.testing.sqlutils import SQLTestUtils
@@ -252,8 +253,8 @@ class FrameApplyFunctionMixin:
         actual.columns = ["a", "b"]
         self.assert_eq(actual, pdf)
 
-        # For NumPy typing, NumPy version should be 1.21+ and Python version 
should be 3.8+
-        if sys.version_info >= (3, 8):
+        # For NumPy typing, NumPy version should be 1.21+
+        if LooseVersion(np.__version__) >= LooseVersion("1.21"):
             import numpy.typing as ntp
 
             psdf = ps.from_pandas(pdf)
diff --git a/python/pyspark/pandas/tests/test_typedef.py 
b/python/pyspark/pandas/tests/test_typedef.py
index 52913fb65f09..e8095ce4ba06 100644
--- a/python/pyspark/pandas/tests/test_typedef.py
+++ b/python/pyspark/pandas/tests/test_typedef.py
@@ -25,6 +25,8 @@ import pandas
 import pandas as pd
 from pandas.api.types import CategoricalDtype
 import numpy as np
+
+from pyspark.loose_version import LooseVersion
 from pyspark.sql.types import (
     ArrayType,
     BinaryType,
@@ -360,8 +362,8 @@ class TypeHintTestsMixin:
                 (np.dtype("object"), ArrayType(spark_type)),
             )
 
-            # For NumPy typing, NumPy version should be 1.21+ and Python 
version should be 3.8+
-            if sys.version_info >= (3, 8):
+            # For NumPy typing, NumPy version should be 1.21+
+            if LooseVersion(np.__version__) >= LooseVersion("1.21"):
                 import numpy.typing as ntp
 
                 self.assertEqual(
diff --git a/python/pyspark/pandas/typedef/typehints.py 
b/python/pyspark/pandas/typedef/typehints.py
index bb0f70ee9245..15d6b443634a 100644
--- a/python/pyspark/pandas/typedef/typehints.py
+++ b/python/pyspark/pandas/typedef/typehints.py
@@ -31,6 +31,7 @@ import pandas as pd
 from pandas.api.types import CategoricalDtype, pandas_dtype  # type: 
ignore[attr-defined]
 from pandas.api.extensions import ExtensionDtype
 
+
 extension_dtypes: Tuple[type, ...]
 try:
     from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype
@@ -147,8 +148,10 @@ def as_spark_type(
     - dictionaries of field_name -> type
     - Python3's typing system
     """
-    # For NumPy typing, NumPy version should be 1.21+ and Python version 
should be 3.8+
-    if sys.version_info >= (3, 8):
+    from pyspark.loose_version import LooseVersion
+
+    # For NumPy typing, NumPy version should be 1.21+
+    if LooseVersion(np.__version__) >= LooseVersion("1.21"):
         if (
             hasattr(tpe, "__origin__")
             and tpe.__origin__ is np.ndarray  # type: ignore[union-attr]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-46254][PYTHON] Remove stale Python 3.8/3.7 version checking

Reply via email to