This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new fa60ea6 ARROW-7950: [Python] Determine + test minimal pandas version
+ raise error when pandas is too old
fa60ea6 is described below
commit fa60ea6ccaad1656c636f25ec0bef5f7efe88bca
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Thu Apr 23 16:21:44 2020 -0500
ARROW-7950: [Python] Determine + test minimal pandas version + raise error
when pandas is too old
Closes #6992 from jorisvandenbossche/ARROW-7950-pandas-version
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
---
.github/workflows/python.yml | 6 ++---
dev/tasks/tasks.yml | 16 +++++++++++++
python/pyarrow/pandas-shim.pxi | 46 +++++++++++++++++++------------------
python/pyarrow/tests/test_pandas.py | 13 +++++++----
4 files changed, 51 insertions(+), 30 deletions(-)
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index bf41cfb..f1a4ff3 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -83,7 +83,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- python: [3.6]
+ python: [3.8]
env:
PYTHON: ${{ matrix.python }}
steps:
@@ -126,8 +126,8 @@ jobs:
strategy:
fail-fast: false
matrix:
- python: [3.8]
- pandas: ["latest"]
+ python: [3.6]
+ pandas: ["latest", "0.23"]
env:
PYTHON: ${{ matrix.python }}
PANDAS: ${{ matrix.pandas }}
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index e4f3331..732e075 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -2026,6 +2026,22 @@ tasks:
run:
- conda-python-pandas
+ test-conda-python-3.6-pandas-0.23:
+ ci: circle
+ platform: linux
+ template: docker-tests/circle.linux.yml
+ params:
+ env:
+ PYTHON: 3.6
+ PANDAS: 0.23
+ build:
+ - conda-cpp
+ - conda-python
+ nocache:
+ - conda-python-pandas
+ run:
+ - conda-python-pandas
+
test-conda-python-3.7-dask-latest:
ci: circle
platform: linux
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index 3ba86c8..edb7ff6 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -17,6 +17,9 @@
# pandas lazy-loading API shim that reduces API call and import overhead
+import warnings
+
+
cdef class _PandasAPIShim(object):
"""
Lazy pandas importer that isolates usages of pandas APIs and avoids
@@ -55,39 +58,38 @@ cdef class _PandasAPIShim(object):
from distutils.version import LooseVersion
self._loose_version = LooseVersion(pd.__version__)
+ if self._loose_version < LooseVersion('0.23.0'):
+ self._have_pandas = False
+ if raise_:
+ raise ImportError(
+ "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+ "installed".format(self._version)
+ )
+ else:
+ warnings.warn(
+ "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+ "installed. Therefore, pandas-specific integration is not "
+ "used.".format(self._version), stacklevel=2)
+ return
+
self._compat_module = pdcompat
self._data_frame = pd.DataFrame
self._index = pd.Index
self._categorical_type = pd.Categorical
self._series = pd.Series
- if self._loose_version >= LooseVersion('0.23.0'):
- self._extension_array = pd.api.extensions.ExtensionArray
- self._array_like_types = (
- self._series, self._index, self._categorical_type,
- self._extension_array)
- self._extension_dtype = pd.api.extensions.ExtensionDtype
- else:
- self._extension_array = None
- self._array_like_types = (
- self._series, self._index, self._categorical_type)
- self._extension_dtype = None
+ self._extension_array = pd.api.extensions.ExtensionArray
+ self._array_like_types = (
+ self._series, self._index, self._categorical_type,
+ self._extension_array)
+ self._extension_dtype = pd.api.extensions.ExtensionDtype
if self._loose_version >= LooseVersion('0.24.0'):
self._is_extension_array_dtype = \
pd.api.types.is_extension_array_dtype
else:
self._is_extension_array_dtype = None
- if self._loose_version >= LooseVersion('0.20.0'):
- from pandas.api.types import DatetimeTZDtype
- self._types_api = pd.api.types
- elif self._loose_version >= LooseVersion('0.19.0'):
- from pandas.types.dtypes import DatetimeTZDtype
- self._types_api = pd.api.types
- else:
- from pandas.types.dtypes import DatetimeTZDtype
- self._types_api = pd.core.common
-
- self._datetimetz_type = DatetimeTZDtype
+ self._types_api = pd.api.types
+ self._datetimetz_type = pd.api.types.DatetimeTZDtype
self._have_pandas = True
if self._loose_version > LooseVersion('0.25'):
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 2440f6f..ed8b052 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2675,8 +2675,8 @@ def test_convert_unsupported_type_error_message():
df = pd.DataFrame({'a': [A(), A()]})
- expected_msg = 'Conversion failed for column a with type object'
- with pytest.raises(ValueError, match=expected_msg):
+ msg = 'Conversion failed for column a with type object'
+ with pytest.raises(ValueError, match=msg):
pa.Table.from_pandas(df)
# period unsupported for pandas <= 0.25
@@ -2685,8 +2685,8 @@ def test_convert_unsupported_type_error_message():
'a': pd.period_range('2000-01-01', periods=20),
})
- expected_msg = 'Conversion failed for column a with type period'
- with pytest.raises(TypeError, match=expected_msg):
+ msg = 'Conversion failed for column a with type (period|object)'
+ with pytest.raises((TypeError, ValueError), match=msg):
pa.Table.from_pandas(df)
@@ -3560,7 +3560,7 @@ def
test_array_protocol_pandas_extension_types(monkeypatch):
# ARROW-7022 - ensure protocol works for Period / Interval extension dtypes
if LooseVersion(pd.__version__) < '0.24.0':
- pytest.skip(reason='Period/IntervalArray only introduced in 0.24')
+ pytest.skip('Period/IntervalArray only introduced in 0.24')
storage = pa.array([1, 2, 3], type=pa.int64())
expected = pa.ExtensionArray.from_storage(DummyExtensionType(), storage)
@@ -3654,6 +3654,9 @@ def
test_conversion_extensiontype_to_extensionarray(monkeypatch):
# converting extension type to linked pandas ExtensionDtype/Array
import pandas.core.internals as _int
+ if LooseVersion(pd.__version__) < "0.24.0":
+ pytest.skip("ExtensionDtype introduced in pandas 0.24")
+
storage = pa.array([1, 2, 3, 4], pa.int64())
arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
table = pa.table({'a': arr})