[arrow] branch main updated: GH-37574: [Python] Compatibilty with numpy 2.0 (#38040)

jorisvandenbossche Thu, 05 Oct 2023 09:24:51 -0700

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 1eec38d833 GH-37574: [Python] Compatibilty with numpy 2.0 (#38040)
1eec38d833 is described below

commit 1eec38d833ca57ad826cea57f85d68532172c88c
Author: Thomas Grainger <[email protected]>
AuthorDate: Thu Oct 5 09:24:40 2023 -0700

    GH-37574: [Python] Compatibilty with numpy 2.0 (#38040)
    
    
    
    ### What changes are included in this PR?
    
    support for numpy 2
    install numpy 2 and pandas 2.2.0.dev0 from scientific-python-nightly-wheels
    
    ### Are these changes tested?
    I tested this locally with numpy==2.0.0.dev0 and 
pandas==2.2.0.dev0+325.g6c58a217f5
    
    ### Are there any user-facing changes?
    No
    
    * Closes: #37574
    
    Authored-by: Thomas Grainger <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 ci/scripts/install_pandas.sh         |  4 ++--
 python/pyarrow/pandas_compat.py      |  7 ++++---
 python/pyarrow/tests/test_pandas.py  | 22 +++++++++++++++-------
 python/pyarrow/tests/test_scalars.py |  4 +++-
 4 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh
index f0cb76fb66..6a506a8651 100755
--- a/ci/scripts/install_pandas.sh
+++ b/ci/scripts/install_pandas.sh
@@ -28,7 +28,7 @@ pandas=$1
 numpy=${2:-"latest"}
 
 if [ "${numpy}" = "nightly" ]; then
-  pip install --extra-index-url 
https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy
+  pip install --extra-index-url 
https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre numpy
 elif [ "${numpy}" = "latest" ]; then
   pip install numpy
 else
@@ -38,7 +38,7 @@ fi
 if [ "${pandas}" = "upstream_devel" ]; then
   pip install git+https://github.com/pandas-dev/pandas.git
 elif [ "${pandas}" = "nightly" ]; then
-  pip install --extra-index-url 
https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas
+  pip install --extra-index-url 
https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre pandas
 elif [ "${pandas}" = "latest" ]; then
   pip install pandas
 else
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 4e5c868efd..e232603ba4 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -31,6 +31,7 @@ import re
 import warnings
 
 import numpy as np
+from numpy.core.numerictypes import sctypes as _np_sctypes
 
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes  # noqa
@@ -98,7 +99,7 @@ _numpy_logical_type_map = {
     np.float32: 'float32',
     np.float64: 'float64',
     'datetime64[D]': 'date',
-    np.unicode_: 'string',
+    np.str_: 'string',
     np.bytes_: 'bytes',
 }
 
@@ -780,7 +781,7 @@ def table_to_blockmanager(options, table, categories=None,
 # dataframe (complex not included since not supported by Arrow)
 _pandas_supported_numpy_types = {
     str(np.dtype(typ))
-    for typ in (np.sctypes['int'] + np.sctypes['uint'] + np.sctypes['float'] +
+    for typ in (_np_sctypes['int'] + _np_sctypes['uint'] + 
_np_sctypes['float'] +
                 ['object', 'bool'])
 }
 
@@ -1010,7 +1011,7 @@ _pandas_logical_type_map = {
     'date': 'datetime64[D]',
     'datetime': 'datetime64[ns]',
     'datetimetz': 'datetime64[ns]',
-    'unicode': np.unicode_,
+    'unicode': np.str_,
     'bytes': np.bytes_,
     'string': np.str_,
     'integer': np.int64,
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 67502af443..0d01928f44 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -50,6 +50,14 @@ except ImportError:
     pass
 
 
+try:
+    _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
+except AttributeError:
+    from numpy.exceptions import (
+        VisibleDeprecationWarning as _np_VisibleDeprecationWarning
+    )
+
+
 # Marks all of the tests in this module
 pytestmark = pytest.mark.pandas
 
@@ -706,7 +714,7 @@ class TestConvertPrimitiveTypes:
 
     def test_float_nulls_to_ints(self):
         # ARROW-2135
-        df = pd.DataFrame({"a": [1.0, 2.0, np.NaN]})
+        df = pd.DataFrame({"a": [1.0, 2.0, np.nan]})
         schema = pa.schema([pa.field("a", pa.int16(), nullable=True)])
         table = pa.Table.from_pandas(df, schema=schema, safe=False)
         assert table[0].to_pylist() == [1, 2, None]
@@ -2329,7 +2337,7 @@ class TestConvertListTypes:
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore",
                                     "Creating an ndarray from ragged nested",
-                                    np.VisibleDeprecationWarning)
+                                    _np_VisibleDeprecationWarning)
             warnings.filterwarnings("ignore", "elementwise comparison failed",
                                     DeprecationWarning)
             tm.assert_series_equal(
@@ -2441,26 +2449,26 @@ class TestConvertListTypes:
         np_arr = chunked_arr.to_numpy()
 
         expected = np.array([[1., 2.], [3., 4., 5.], None,
-                            [6., np.NaN]], dtype="object")
+                            [6., np.nan]], dtype="object")
         for left, right in zip(np_arr, expected):
             if right is None:
                 assert left == right
             else:
                 npt.assert_array_equal(left, right)
 
-        expected_base = np.array([[1., 2., 3., 4., 5., 6., np.NaN]])
+        expected_base = np.array([[1., 2., 3., 4., 5., 6., np.nan]])
         npt.assert_array_equal(np_arr[0].base, expected_base)
 
         np_arr_sliced = chunked_arr.slice(1, 3).to_numpy()
 
-        expected = np.array([[3, 4, 5], None, [6, np.NaN]], dtype="object")
+        expected = np.array([[3, 4, 5], None, [6, np.nan]], dtype="object")
         for left, right in zip(np_arr_sliced, expected):
             if right is None:
                 assert left == right
             else:
                 npt.assert_array_equal(left, right)
 
-        expected_base = np.array([[3., 4., 5., 6., np.NaN]])
+        expected_base = np.array([[3., 4., 5., 6., np.nan]])
         npt.assert_array_equal(np_arr_sliced[0].base, expected_base)
 
     def test_list_values_behind_null(self):
@@ -2471,7 +2479,7 @@ class TestConvertListTypes:
         )
         np_arr = arr.to_numpy(zero_copy_only=False)
 
-        expected = np.array([[1., 2.], None, [3., np.NaN]], dtype="object")
+        expected = np.array([[1., 2.], None, [3., np.nan]], dtype="object")
         for left, right in zip(np_arr, expected):
             if right is None:
                 assert left == right
diff --git a/python/pyarrow/tests/test_scalars.py 
b/python/pyarrow/tests/test_scalars.py
index 8a1dcfb057..1d8d77f50d 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -204,7 +204,9 @@ def test_numerics():
     # float16
     s = pa.scalar(np.float16(0.5), type='float16')
     assert isinstance(s, pa.HalfFloatScalar)
-    assert repr(s) == "<pyarrow.HalfFloatScalar: 0.5>"
+    # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
+    # on numpy1 repr(np.float16(0.5)) == "0.5"
+    assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
     assert str(s) == "0.5"
     assert s.as_py() == 0.5

[arrow] branch main updated: GH-37574: [Python] Compatibilty with numpy 2.0 (#38040)

Reply via email to