[arrow] branch master updated: ARROW-10281: [Python] Fix warnings when running tests

kszucs Mon, 12 Oct 2020 10:51:52 -0700

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new e6eb61f  ARROW-10281: [Python] Fix warnings when running tests
e6eb61f is described below

commit e6eb61f58ef382003c9462924563f575d9a59c13
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Mon Oct 12 19:50:48 2020 +0200

    ARROW-10281: [Python] Fix warnings when running tests
    
    Closes #8439 from jorisvandenbossche/ARROW-10281-test-warnings
    
    Authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Krisztián Szűcs <[email protected]>
---
 python/pyarrow/tests/test_adhoc_memory_leak.py |  2 +-
 python/pyarrow/tests/test_convert_builtin.py   |  2 +-
 python/pyarrow/tests/test_dataset.py           |  4 ++--
 python/pyarrow/tests/test_io.py                |  2 +-
 python/pyarrow/tests/test_ipc.py               |  1 +
 python/pyarrow/tests/test_pandas.py            | 10 +++++++---
 python/pyarrow/tests/test_parquet.py           |  1 +
 python/pyarrow/tests/test_plasma.py            |  6 +++++-
 python/scripts/test_leak.py                    |  4 ++--
 9 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py 
b/python/pyarrow/tests/test_adhoc_memory_leak.py
index d95444d..cd381cf 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -32,7 +32,7 @@ except ImportError:
 @pytest.mark.pandas
 def test_deserialize_pandas_arrow_7956():
     df = pd.DataFrame({'a': np.arange(10000),
-                       'b': [pd.util.testing.rands(5) for _ in range(10000)]})
+                       'b': [test_util.rands(5) for _ in range(10000)]})
 
     def action():
         df_bytes = pa.ipc.serialize_pandas(df).to_pybytes()
diff --git a/python/pyarrow/tests/test_convert_builtin.py 
b/python/pyarrow/tests/test_convert_builtin.py
index 91624d7..cb6b4b3 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -569,7 +569,7 @@ def test_ndarray_nested_numpy_double(from_pandas, 
inner_seq):
         inner_seq([1., 2., 3.]),
         inner_seq([np.nan]),
         None
-    ])
+    ], dtype=object)
     arr = pa.array(data, from_pandas=from_pandas)
     assert len(arr) == 4
     assert arr.null_count == 1
diff --git a/python/pyarrow/tests/test_dataset.py 
b/python/pyarrow/tests/test_dataset.py
index 8c7ebc0..d2953e6 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -169,9 +169,9 @@ def multisourcefs(request):
         with mockfs.open_output_stream(path) as out:
             pq.write_table(_table_from_pandas(chunk), out)
 
-    # create one with schema partitioning by week and color
+    # create one with schema partitioning by weekday and color
     mockfs.create_dir('schema')
-    for part, chunk in df_b.groupby([df_b.date.dt.week, df_b.color]):
+    for part, chunk in df_b.groupby([df_b.date.dt.dayofweek, df_b.color]):
         folder = 'schema/{}/{}'.format(*part)
         path = '{}/chunk.parquet'.format(folder)
         mockfs.create_dir(folder)
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 7d3237a..69dc135 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -573,7 +573,7 @@ def test_compress_decompress(compression):
     INPUT_SIZE = 10000
     test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
                  .astype(np.uint8)
-                 .tostring())
+                 .tobytes())
     test_buf = pa.py_buffer(test_data)
 
     compressed_buf = pa.compress(test_buf, codec=compression)
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 3d3e72e..62bb1e7 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -765,6 +765,7 @@ def test_serialize_pandas_no_preserve_index():
     assert_frame_equal(result, df)
 
 
[email protected]("ignore:'pyarrow:DeprecationWarning")
 def test_serialize_with_pandas_objects():
     df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3])
     s = pd.Series([1, 2, 3, 4])
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 54f4574..f46e75f 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1793,7 +1793,7 @@ class TestConvertListTypes:
             np.arange(5, dtype=dtype),
             None,
             np.arange(1, dtype=dtype)
-        ])
+        ], dtype=object)
         type_ = pa.list_(pa.int8())
         parr = pa.array(arr, type=type_)
 
@@ -2090,7 +2090,7 @@ class TestConvertListTypes:
                       type=pa.large_list(pa.large_list(pa.int64())))
              .to_pandas())
         tm.assert_series_equal(
-            s, pd.Series([[[1, 2, 3], [4]], None]),
+            s, pd.Series([[[1, 2, 3], [4]], None], dtype=object),
             check_names=False)
 
     def test_large_binary_list(self):
@@ -2717,7 +2717,11 @@ class TestConvertMisc:
 
     def test_error_sparse(self):
         # ARROW-2818
-        df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
+        try:
+            df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])})
+        except AttributeError:
+            # pandas.arrays module introduced in pandas 0.24
+            df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
         with pytest.raises(TypeError, match="Sparse pandas data"):
             pa.Table.from_pandas(df)
 
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 34528b6..37e1043 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -4195,6 +4195,7 @@ def test_filter_before_validate_schema(tempdir, 
use_legacy_dataset):
 @pytest.mark.pandas
 @pytest.mark.fastparquet
 @pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning")
[email protected]("ignore:tostring:DeprecationWarning:fastparquet")
 def test_fastparquet_cross_compatibility(tempdir):
     fp = pytest.importorskip('fastparquet')
 
diff --git a/python/pyarrow/tests/test_plasma.py 
b/python/pyarrow/tests/test_plasma.py
index c574735..3c00c29 100644
--- a/python/pyarrow/tests/test_plasma.py
+++ b/python/pyarrow/tests/test_plasma.py
@@ -307,6 +307,8 @@ class TestPlasmaClient:
             [result] = self.plasma_client.get([object_id], timeout_ms=0)
             assert result == pa.plasma.ObjectNotAvailable
 
+    @pytest.mark.filterwarnings(
+        "ignore:'pyarrow.deserialize':DeprecationWarning")
     def test_put_and_get_raw_buffer(self):
         temp_id = random_object_id()
         use_meta = b"RAW"
@@ -338,6 +340,8 @@ class TestPlasmaClient:
             result = deserialize_or_output(result)
             assert result == pa.plasma.ObjectNotAvailable
 
+    @pytest.mark.filterwarnings(
+        "ignore:'serialization_context':DeprecationWarning")
     def test_put_and_get_serialization_context(self):
 
         class CustomType:
@@ -349,7 +353,7 @@ class TestPlasmaClient:
         with pytest.raises(pa.ArrowSerializationError):
             self.plasma_client.put(val)
 
-        serialization_context = pa.SerializationContext()
+        serialization_context = pa.lib.SerializationContext()
         serialization_context.register_type(CustomType, 20*"\x00")
 
         object_id = self.plasma_client.put(
diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
index 83aac13..f2bbe8d 100644
--- a/python/scripts/test_leak.py
+++ b/python/scripts/test_leak.py
@@ -20,7 +20,7 @@
 import pyarrow as pa
 import numpy as np
 import pandas as pd
-import pandas.util.testing as tm
+from pyarrow.tests.util import rands
 import memory_profiler
 import gc
 import io
@@ -85,7 +85,7 @@ def test_leak3():
                        for i in range(50)})
     table = pa.Table.from_pandas(df, preserve_index=False)
 
-    writer = pq.ParquetWriter('leak_test_' + tm.rands(5) + '.parquet',
+    writer = pq.ParquetWriter('leak_test_' + rands(5) + '.parquet',
                               table.schema)
 
     def func():

[arrow] branch master updated: ARROW-10281: [Python] Fix warnings when running tests

Reply via email to