[arrow] branch main updated: GH-36642: [Python][CI] Configure warnings as errors during pytest (#37018)

kou Sun, 06 Aug 2023 20:38:30 -0700

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 2b36521e52 GH-36642: [Python][CI] Configure warnings as errors during 
pytest (#37018)
2b36521e52 is described below

commit 2b36521e52f61f6a68e58e7c8c2f7bf2ed805cdc
Author: Dane Pitkin <[email protected]>
AuthorDate: Sun Aug 6 23:38:14 2023 -0400

    GH-36642: [Python][CI] Configure warnings as errors during pytest (#37018)
    
    ### Rationale for this change
    
    Warnings are constantly being introduced into the pyarrow tests. Let's try 
enforcing them as errors in an effort to keep the codebase healthy.
    
    ### What changes are included in this PR?
    
    * Fixed existing warnings
    * Set warnings as errors in CI
    
    ### Are these changes tested?
    
    Yes, ran pytests locally w/o warnings.
    
    ### Are there any user-facing changes?
    
    No
    * Closes: #36642
    
    Authored-by: Dane Pitkin <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 dev/tasks/tasks.yml                          |  3 ++-
 docker-compose.yml                           |  1 +
 python/pyarrow/tests/parquet/conftest.py     | 15 +++++++++------
 python/pyarrow/tests/parquet/test_dataset.py |  2 +-
 python/pyarrow/tests/strategies.py           | 22 ++++++++++++----------
 python/pyarrow/tests/test_pandas.py          |  5 ++++-
 python/pyarrow/tests/test_tensor.py          |  7 +++++--
 7 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 73b793162d..941506b9c2 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1253,6 +1253,7 @@ tasks:
     params:
       env:
         PYTHON: "{{ python_version }}"
+        PYTEST_ARGS: "-W error"
       image: conda-python
 {% endfor %}
 
@@ -1265,7 +1266,7 @@ tasks:
         HYPOTHESIS_PROFILE: ci
         PYARROW_TEST_HYPOTHESIS: ON
         # limit to execute hypothesis tests only
-        PYTEST_ARGS: "-m hypothesis"
+        PYTEST_ARGS: "-m hypothesis -W error"
       image: conda-python-pandas
 
   test-conda-python-3.10-substrait:
diff --git a/docker-compose.yml b/docker-compose.yml
index fe98a30d0b..3bf346ef94 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -835,6 +835,7 @@ services:
     shm_size: *shm-size
     environment:
       <<: [*common, *ccache, *sccache]
+      PYTEST_ARGS:  # inherit
     volumes: *conda-volumes
     command: &python-conda-command
       ["
diff --git a/python/pyarrow/tests/parquet/conftest.py 
b/python/pyarrow/tests/parquet/conftest.py
index 1e75493cda..461c24af22 100644
--- a/python/pyarrow/tests/parquet/conftest.py
+++ b/python/pyarrow/tests/parquet/conftest.py
@@ -29,9 +29,10 @@ def datadir(base_datadir):
 def s3_bucket(s3_server):
     boto3 = pytest.importorskip('boto3')
     botocore = pytest.importorskip('botocore')
+    s3_bucket_name = 'test-s3fs'
 
     host, port, access_key, secret_key = s3_server['connection']
-    s3 = boto3.resource(
+    s3_client = boto3.client(
         's3',
         endpoint_url='http://{}:{}'.format(host, port),
         aws_access_key_id=access_key,
@@ -39,13 +40,15 @@ def s3_bucket(s3_server):
         config=botocore.client.Config(signature_version='s3v4'),
         region_name='us-east-1'
     )
-    bucket = s3.Bucket('test-s3fs')
+
     try:
-        bucket.create()
+        s3_client.create_bucket(Bucket=s3_bucket_name)
     except Exception:
-        # we get BucketAlreadyOwnedByYou error with fsspec handler
-        pass
-    return 'test-s3fs'
+        pass  # we get BucketAlreadyOwnedByYou error with fsspec handler
+    finally:
+        s3_client.close()
+
+    return s3_bucket_name
 
 
 @pytest.fixture
diff --git a/python/pyarrow/tests/parquet/test_dataset.py 
b/python/pyarrow/tests/parquet/test_dataset.py
index cd991617c9..3e6ff49265 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1316,7 +1316,7 @@ def _test_write_to_dataset_with_partitions(base_path,
         output_df[col] = output_df[col].astype('category')
 
     if schema:
-        expected_date_type = 
schema.field_by_name('date').type.to_pandas_dtype()
+        expected_date_type = schema.field('date').type.to_pandas_dtype()
         output_df["date"] = output_df["date"].astype(expected_date_type)
 
     tm.assert_frame_equal(output_df, input_df)
diff --git a/python/pyarrow/tests/strategies.py 
b/python/pyarrow/tests/strategies.py
index 48f7e53817..bb88a4dcb7 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -182,15 +182,17 @@ def struct_types(draw, item_strategy=primitive_types):
 
 
 def dictionary_types(key_strategy=None, value_strategy=None):
-    key_strategy = key_strategy or signed_integer_types
-    value_strategy = value_strategy or st.one_of(
-        bool_type,
-        integer_types,
-        st.sampled_from([pa.float32(), pa.float64()]),
-        binary_type,
-        string_type,
-        fixed_size_binary_type,
-    )
+    if key_strategy is None:
+        key_strategy = signed_integer_types
+    if value_strategy is None:
+        value_strategy = st.one_of(
+            bool_type,
+            integer_types,
+            st.sampled_from([pa.float32(), pa.float64()]),
+            binary_type,
+            string_type,
+            fixed_size_binary_type,
+        )
     return st.builds(pa.dictionary, key_strategy, value_strategy)
 
 
@@ -368,7 +370,7 @@ def record_batches(draw, type, rows=None, max_fields=None):
     children = [draw(arrays(field.type, size=rows)) for field in schema]
     # TODO(kszucs): the names and schema arguments are not consistent with
     #               Table.from_array's arguments
-    return pa.RecordBatch.from_arrays(children, names=schema)
+    return pa.RecordBatch.from_arrays(children, schema=schema)
 
 
 @st.composite
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 8bdc7253a4..ef6ddd0993 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2913,7 +2913,10 @@ class TestConvertMisc:
                           'f4', 'f8']
 
         for type_name in numeric_dtypes:
-            cases.append(random_numbers.astype(type_name))
+            # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                cases.append(random_numbers.astype(type_name))
 
         # strings
         cases.append(np.array([random_ascii(10) for i in range(N * K)],
diff --git a/python/pyarrow/tests/test_tensor.py 
b/python/pyarrow/tests/test_tensor.py
index aee46bc936..3e6a4ca8ed 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -18,6 +18,7 @@
 import os
 import sys
 import pytest
+import warnings
 import weakref
 
 import numpy as np
@@ -82,8 +83,10 @@ def test_tensor_base_object():
 @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
 def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
     dtype = np.dtype(dtype_str)
-    data = (100 * np.random.randn(10, 4)).astype(dtype)
-
+    # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        data = (100 * np.random.randn(10, 4)).astype(dtype)
     tensor = pa.Tensor.from_numpy(data)
     assert tensor.type == arrow_type

[arrow] branch main updated: GH-36642: [Python][CI] Configure warnings as errors during pytest (#37018)

Reply via email to