This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e6eb61f ARROW-10281: [Python] Fix warnings when running tests
e6eb61f is described below
commit e6eb61f58ef382003c9462924563f575d9a59c13
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Mon Oct 12 19:50:48 2020 +0200
ARROW-10281: [Python] Fix warnings when running tests
Closes #8439 from jorisvandenbossche/ARROW-10281-test-warnings
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Krisztián Szűcs <[email protected]>
---
python/pyarrow/tests/test_adhoc_memory_leak.py | 2 +-
python/pyarrow/tests/test_convert_builtin.py | 2 +-
python/pyarrow/tests/test_dataset.py | 4 ++--
python/pyarrow/tests/test_io.py | 2 +-
python/pyarrow/tests/test_ipc.py | 1 +
python/pyarrow/tests/test_pandas.py | 10 +++++++---
python/pyarrow/tests/test_parquet.py | 1 +
python/pyarrow/tests/test_plasma.py | 6 +++++-
python/scripts/test_leak.py | 4 ++--
9 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py
b/python/pyarrow/tests/test_adhoc_memory_leak.py
index d95444d..cd381cf 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -32,7 +32,7 @@ except ImportError:
@pytest.mark.pandas
def test_deserialize_pandas_arrow_7956():
df = pd.DataFrame({'a': np.arange(10000),
- 'b': [pd.util.testing.rands(5) for _ in range(10000)]})
+ 'b': [test_util.rands(5) for _ in range(10000)]})
def action():
df_bytes = pa.ipc.serialize_pandas(df).to_pybytes()
diff --git a/python/pyarrow/tests/test_convert_builtin.py
b/python/pyarrow/tests/test_convert_builtin.py
index 91624d7..cb6b4b3 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -569,7 +569,7 @@ def test_ndarray_nested_numpy_double(from_pandas,
inner_seq):
inner_seq([1., 2., 3.]),
inner_seq([np.nan]),
None
- ])
+ ], dtype=object)
arr = pa.array(data, from_pandas=from_pandas)
assert len(arr) == 4
assert arr.null_count == 1
diff --git a/python/pyarrow/tests/test_dataset.py
b/python/pyarrow/tests/test_dataset.py
index 8c7ebc0..d2953e6 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -169,9 +169,9 @@ def multisourcefs(request):
with mockfs.open_output_stream(path) as out:
pq.write_table(_table_from_pandas(chunk), out)
- # create one with schema partitioning by week and color
+ # create one with schema partitioning by weekday and color
mockfs.create_dir('schema')
- for part, chunk in df_b.groupby([df_b.date.dt.week, df_b.color]):
+ for part, chunk in df_b.groupby([df_b.date.dt.dayofweek, df_b.color]):
folder = 'schema/{}/{}'.format(*part)
path = '{}/chunk.parquet'.format(folder)
mockfs.create_dir(folder)
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 7d3237a..69dc135 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -573,7 +573,7 @@ def test_compress_decompress(compression):
INPUT_SIZE = 10000
test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
.astype(np.uint8)
- .tostring())
+ .tobytes())
test_buf = pa.py_buffer(test_data)
compressed_buf = pa.compress(test_buf, codec=compression)
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 3d3e72e..62bb1e7 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -765,6 +765,7 @@ def test_serialize_pandas_no_preserve_index():
assert_frame_equal(result, df)
[email protected]("ignore:'pyarrow:DeprecationWarning")
def test_serialize_with_pandas_objects():
df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3])
s = pd.Series([1, 2, 3, 4])
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 54f4574..f46e75f 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1793,7 +1793,7 @@ class TestConvertListTypes:
np.arange(5, dtype=dtype),
None,
np.arange(1, dtype=dtype)
- ])
+ ], dtype=object)
type_ = pa.list_(pa.int8())
parr = pa.array(arr, type=type_)
@@ -2090,7 +2090,7 @@ class TestConvertListTypes:
type=pa.large_list(pa.large_list(pa.int64())))
.to_pandas())
tm.assert_series_equal(
- s, pd.Series([[[1, 2, 3], [4]], None]),
+ s, pd.Series([[[1, 2, 3], [4]], None], dtype=object),
check_names=False)
def test_large_binary_list(self):
@@ -2717,7 +2717,11 @@ class TestConvertMisc:
def test_error_sparse(self):
# ARROW-2818
- df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
+ try:
+ df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])})
+ except AttributeError:
+ # pandas.arrays module introduced in pandas 0.24
+ df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
with pytest.raises(TypeError, match="Sparse pandas data"):
pa.Table.from_pandas(df)
diff --git a/python/pyarrow/tests/test_parquet.py
b/python/pyarrow/tests/test_parquet.py
index 34528b6..37e1043 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -4195,6 +4195,7 @@ def test_filter_before_validate_schema(tempdir,
use_legacy_dataset):
@pytest.mark.pandas
@pytest.mark.fastparquet
@pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning")
[email protected]("ignore:tostring:DeprecationWarning:fastparquet")
def test_fastparquet_cross_compatibility(tempdir):
fp = pytest.importorskip('fastparquet')
diff --git a/python/pyarrow/tests/test_plasma.py
b/python/pyarrow/tests/test_plasma.py
index c574735..3c00c29 100644
--- a/python/pyarrow/tests/test_plasma.py
+++ b/python/pyarrow/tests/test_plasma.py
@@ -307,6 +307,8 @@ class TestPlasmaClient:
[result] = self.plasma_client.get([object_id], timeout_ms=0)
assert result == pa.plasma.ObjectNotAvailable
+ @pytest.mark.filterwarnings(
+ "ignore:'pyarrow.deserialize':DeprecationWarning")
def test_put_and_get_raw_buffer(self):
temp_id = random_object_id()
use_meta = b"RAW"
@@ -338,6 +340,8 @@ class TestPlasmaClient:
result = deserialize_or_output(result)
assert result == pa.plasma.ObjectNotAvailable
+ @pytest.mark.filterwarnings(
+ "ignore:'serialization_context':DeprecationWarning")
def test_put_and_get_serialization_context(self):
class CustomType:
@@ -349,7 +353,7 @@ class TestPlasmaClient:
with pytest.raises(pa.ArrowSerializationError):
self.plasma_client.put(val)
- serialization_context = pa.SerializationContext()
+ serialization_context = pa.lib.SerializationContext()
serialization_context.register_type(CustomType, 20*"\x00")
object_id = self.plasma_client.put(
diff --git a/python/scripts/test_leak.py b/python/scripts/test_leak.py
index 83aac13..f2bbe8d 100644
--- a/python/scripts/test_leak.py
+++ b/python/scripts/test_leak.py
@@ -20,7 +20,7 @@
import pyarrow as pa
import numpy as np
import pandas as pd
-import pandas.util.testing as tm
+from pyarrow.tests.util import rands
import memory_profiler
import gc
import io
@@ -85,7 +85,7 @@ def test_leak3():
for i in range(50)})
table = pa.Table.from_pandas(df, preserve_index=False)
- writer = pq.ParquetWriter('leak_test_' + tm.rands(5) + '.parquet',
+ writer = pq.ParquetWriter('leak_test_' + rands(5) + '.parquet',
table.schema)
def func():