This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new fa60ea6  ARROW-7950: [Python] Determine + test minimal pandas version 
+ raise error when pandas is too old
fa60ea6 is described below

commit fa60ea6ccaad1656c636f25ec0bef5f7efe88bca
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Thu Apr 23 16:21:44 2020 -0500

    ARROW-7950: [Python] Determine + test minimal pandas version + raise error 
when pandas is too old
    
    Closes #6992 from jorisvandenbossche/ARROW-7950-pandas-version
    
    Authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 .github/workflows/python.yml        |  6 ++---
 dev/tasks/tasks.yml                 | 16 +++++++++++++
 python/pyarrow/pandas-shim.pxi      | 46 +++++++++++++++++++------------------
 python/pyarrow/tests/test_pandas.py | 13 +++++++----
 4 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index bf41cfb..f1a4ff3 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -83,7 +83,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: [3.6]
+        python: [3.8]
     env:
       PYTHON: ${{ matrix.python }}
     steps:
@@ -126,8 +126,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: [3.8]
-        pandas: ["latest"]
+        python: [3.6]
+        pandas: ["latest", "0.23"]
     env:
       PYTHON: ${{ matrix.python }}
       PANDAS: ${{ matrix.pandas }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index e4f3331..732e075 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -2026,6 +2026,22 @@ tasks:
       run:
         - conda-python-pandas
 
+  test-conda-python-3.6-pandas-0.23:
+    ci: circle
+    platform: linux
+    template: docker-tests/circle.linux.yml
+    params:
+      env:
+        PYTHON: 3.6
+        PANDAS: 0.23
+      build:
+        - conda-cpp
+        - conda-python
+      nocache:
+        - conda-python-pandas
+      run:
+        - conda-python-pandas
+
   test-conda-python-3.7-dask-latest:
     ci: circle
     platform: linux
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index 3ba86c8..edb7ff6 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -17,6 +17,9 @@
 
 # pandas lazy-loading API shim that reduces API call and import overhead
 
+import warnings
+
+
 cdef class _PandasAPIShim(object):
     """
     Lazy pandas importer that isolates usages of pandas APIs and avoids
@@ -55,39 +58,38 @@ cdef class _PandasAPIShim(object):
         from distutils.version import LooseVersion
         self._loose_version = LooseVersion(pd.__version__)
 
+        if self._loose_version < LooseVersion('0.23.0'):
+            self._have_pandas = False
+            if raise_:
+                raise ImportError(
+                    "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+                    "installed".format(self._version)
+                )
+            else:
+                warnings.warn(
+                    "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+                    "installed. Therefore, pandas-specific integration is not "
+                    "used.".format(self._version), stacklevel=2)
+                return
+
         self._compat_module = pdcompat
         self._data_frame = pd.DataFrame
         self._index = pd.Index
         self._categorical_type = pd.Categorical
         self._series = pd.Series
-        if self._loose_version >= LooseVersion('0.23.0'):
-            self._extension_array = pd.api.extensions.ExtensionArray
-            self._array_like_types = (
-                self._series, self._index, self._categorical_type,
-                self._extension_array)
-            self._extension_dtype = pd.api.extensions.ExtensionDtype
-        else:
-            self._extension_array = None
-            self._array_like_types = (
-                self._series, self._index, self._categorical_type)
-            self._extension_dtype = None
+        self._extension_array = pd.api.extensions.ExtensionArray
+        self._array_like_types = (
+            self._series, self._index, self._categorical_type,
+            self._extension_array)
+        self._extension_dtype = pd.api.extensions.ExtensionDtype
         if self._loose_version >= LooseVersion('0.24.0'):
             self._is_extension_array_dtype = \
                 pd.api.types.is_extension_array_dtype
         else:
             self._is_extension_array_dtype = None
 
-        if self._loose_version >= LooseVersion('0.20.0'):
-            from pandas.api.types import DatetimeTZDtype
-            self._types_api = pd.api.types
-        elif self._loose_version >= LooseVersion('0.19.0'):
-            from pandas.types.dtypes import DatetimeTZDtype
-            self._types_api = pd.api.types
-        else:
-            from pandas.types.dtypes import DatetimeTZDtype
-            self._types_api = pd.core.common
-
-        self._datetimetz_type = DatetimeTZDtype
+        self._types_api = pd.api.types
+        self._datetimetz_type = pd.api.types.DatetimeTZDtype
         self._have_pandas = True
 
         if self._loose_version > LooseVersion('0.25'):
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 2440f6f..ed8b052 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2675,8 +2675,8 @@ def test_convert_unsupported_type_error_message():
 
     df = pd.DataFrame({'a': [A(), A()]})
 
-    expected_msg = 'Conversion failed for column a with type object'
-    with pytest.raises(ValueError, match=expected_msg):
+    msg = 'Conversion failed for column a with type object'
+    with pytest.raises(ValueError, match=msg):
         pa.Table.from_pandas(df)
 
     # period unsupported for pandas <= 0.25
@@ -2685,8 +2685,8 @@ def test_convert_unsupported_type_error_message():
             'a': pd.period_range('2000-01-01', periods=20),
         })
 
-        expected_msg = 'Conversion failed for column a with type period'
-        with pytest.raises(TypeError, match=expected_msg):
+        msg = 'Conversion failed for column a with type (period|object)'
+        with pytest.raises((TypeError, ValueError), match=msg):
             pa.Table.from_pandas(df)
 
 
@@ -3560,7 +3560,7 @@ def 
test_array_protocol_pandas_extension_types(monkeypatch):
     # ARROW-7022 - ensure protocol works for Period / Interval extension dtypes
 
     if LooseVersion(pd.__version__) < '0.24.0':
-        pytest.skip(reason='Period/IntervalArray only introduced in 0.24')
+        pytest.skip('Period/IntervalArray only introduced in 0.24')
 
     storage = pa.array([1, 2, 3], type=pa.int64())
     expected = pa.ExtensionArray.from_storage(DummyExtensionType(), storage)
@@ -3654,6 +3654,9 @@ def 
test_conversion_extensiontype_to_extensionarray(monkeypatch):
     # converting extension type to linked pandas ExtensionDtype/Array
     import pandas.core.internals as _int
 
+    if LooseVersion(pd.__version__) < "0.24.0":
+        pytest.skip("ExtensionDtype introduced in pandas 0.24")
+
     storage = pa.array([1, 2, 3, 4], pa.int64())
     arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
     table = pa.table({'a': arr})

Reply via email to