[arrow] branch main updated: GH-35014: [Python] Make sure unit tests can run without acero (#35017)

jorisvandenbossche Wed, 12 Apr 2023 08:53:22 -0700

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 2e78adbe15 GH-35014: [Python] Make sure unit tests can run without 
acero (#35017)
2e78adbe15 is described below

commit 2e78adbe15cd4c58dd30ccc4bf2d6d5e0eb2bf4d
Author: Will Jones <[email protected]>
AuthorDate: Wed Apr 12 08:53:08 2023 -0700

    GH-35014: [Python] Make sure unit tests can run without acero (#35017)
    
    ### Rationale for this change
    
    Fixes two failing nightlies:
    
     * 
[example-python-minimal-build-fedora-conda](https://github.com/ursacomputing/crossbow/actions/runs/4653275808/jobs/8234009627)
     * 
[example-python-minimal-build-ubuntu-venv](https://github.com/ursacomputing/crossbow/actions/runs/4653280100/jobs/8234017973)
    
    ### What changes are included in this PR?
    
    Adds a pytest mark for Acero, since it is now required.
    
    ### Are these changes tested?
    
    Yes, this fixes existing tests. Validated it works locally.
    
    ### Are there any user-facing changes?
    
    No.
    * Closes: #35014
    
    Authored-by: Will Jones <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 python/pyarrow/acero.py                      |  8 +++++++-
 python/pyarrow/conftest.py                   |  8 ++++++++
 python/pyarrow/tests/parquet/test_dataset.py |  2 +-
 python/pyarrow/tests/test_acero.py           | 23 ++++++++++++++---------
 python/pyarrow/tests/test_exec_plan.py       |  9 +++++++--
 python/pyarrow/tests/test_table.py           | 11 ++++++-----
 6 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index 00d30a3250..6a72ea1996 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -41,7 +41,13 @@ try:
     import pyarrow.dataset as ds
     from pyarrow._dataset import ScanNodeOptions
 except ImportError:
-    ds = None
+    class DatasetModuleStub:
+        class Dataset:
+            pass
+
+        class InMemoryDataset:
+            pass
+    ds = DatasetModuleStub
 
 
 def _dataset_to_decl(dataset, use_threads=True):
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 42866dbc75..ef09393cfb 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -21,6 +21,7 @@ from pyarrow import Codec
 from pyarrow import fs
 
 groups = [
+    'acero',
     'brotli',
     'bz2',
     'cython',
@@ -50,6 +51,7 @@ groups = [
 ]
 
 defaults = {
+    'acero': False,
     'brotli': Codec.is_available('brotli'),
     'bz2': Codec.is_available('bz2'),
     'cython': False,
@@ -96,6 +98,12 @@ try:
 except ImportError:
     pass
 
+try:
+    import pyarrow.acero  # noqa
+    defaults['acero'] = True
+except ImportError:
+    pass
+
 try:
     import pyarrow.dataset  # noqa
     defaults['dataset'] = True
diff --git a/python/pyarrow/tests/parquet/test_dataset.py 
b/python/pyarrow/tests/parquet/test_dataset.py
index fd24f1642f..d8b97afeb6 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1470,7 +1470,7 @@ def 
test_write_to_dataset_with_partitions_and_custom_filenames(
                         partition_by, partition_filename_callback,
                         use_legacy_dataset=use_legacy_dataset)
 
-    dataset = pq.ParquetDataset(path)
+    dataset = pq.ParquetDataset(path, use_legacy_dataset=use_legacy_dataset)
 
     # ARROW-3538: Ensure partition filenames match the given pattern
     # defined in the local function partition_filename_callback
diff --git a/python/pyarrow/tests/test_acero.py 
b/python/pyarrow/tests/test_acero.py
index 7db4afd000..988e9b6e31 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -21,15 +21,18 @@ import pyarrow as pa
 import pyarrow.compute as pc
 from pyarrow.compute import field
 
-from pyarrow.acero import (
-    Declaration,
-    TableSourceNodeOptions,
-    FilterNodeOptions,
-    ProjectNodeOptions,
-    AggregateNodeOptions,
-    OrderByNodeOptions,
-    HashJoinNodeOptions,
-)
+try:
+    from pyarrow.acero import (
+        Declaration,
+        TableSourceNodeOptions,
+        FilterNodeOptions,
+        ProjectNodeOptions,
+        AggregateNodeOptions,
+        OrderByNodeOptions,
+        HashJoinNodeOptions,
+    )
+except ImportError:
+    pass
 
 try:
     import pyarrow.dataset as ds
@@ -37,6 +40,8 @@ try:
 except ImportError:
     ds = None
 
+pytestmark = pytest.mark.acero
+
 
 @pytest.fixture
 def table_source():
diff --git a/python/pyarrow/tests/test_exec_plan.py 
b/python/pyarrow/tests/test_exec_plan.py
index 599b753306..0fc3d4ec75 100644
--- a/python/pyarrow/tests/test_exec_plan.py
+++ b/python/pyarrow/tests/test_exec_plan.py
@@ -22,11 +22,15 @@ from .test_extension_type import IntegerType
 
 try:
     import pyarrow.dataset as ds
+except ImportError:
+    pass
+
+try:
     from pyarrow.acero import _perform_join, _filter_table
 except ImportError:
     pass
 
-pytestmark = pytest.mark.dataset
+pytestmark = pytest.mark.acero
 
 
 def test_joins_corner_cases():
@@ -89,7 +93,8 @@ def test_joins_corner_cases():
     })
 ])
 @pytest.mark.parametrize("use_threads", [True, False])
[email protected]("use_datasets", [False, True])
[email protected]("use_datasets",
+                         [False, pytest.param(True, 
marks=pytest.mark.dataset)])
 def test_joins(jointype, expected, use_threads, use_datasets):
     # Allocate table here instead of using parametrize
     # this prevents having arrow allocated memory forever around.
diff --git a/python/pyarrow/tests/test_table.py 
b/python/pyarrow/tests/test_table.py
index 1a0eaa3377..1945afe3bb 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1989,6 +1989,7 @@ def test_table_select():
     assert result.equals(expected)
 
 
[email protected]
 def test_table_group_by():
     def sorted_by_keys(d):
         # Ensure a guaranteed order of keys for aggregation results.
@@ -2156,7 +2157,7 @@ def test_table_to_recordbatchreader():
     assert reader.read_next_batch().num_rows == 1
 
 
[email protected]
[email protected]
 def test_table_join():
     t1 = pa.table({
         "colA": [1, 2, 6],
@@ -2183,7 +2184,7 @@ def test_table_join():
     })
 
 
[email protected]
[email protected]
 def test_table_join_unique_key():
     t1 = pa.table({
         "colA": [1, 2, 6],
@@ -2210,7 +2211,7 @@ def test_table_join_unique_key():
     })
 
 
[email protected]
[email protected]
 def test_table_join_collisions():
     t1 = pa.table({
         "colA": [1, 2, 6],
@@ -2234,7 +2235,7 @@ def test_table_join_collisions():
     ], names=["colA", "colB", "colVals", "colB", "colVals"])
 
 
[email protected]
[email protected]
 def test_table_filter_expression():
     t1 = pa.table({
         "colA": [1, 2, 6],
@@ -2258,7 +2259,7 @@ def test_table_filter_expression():
     })
 
 
[email protected]
[email protected]
 def test_table_join_many_columns():
     t1 = pa.table({
         "colA": [1, 2, 6],

[arrow] branch main updated: GH-35014: [Python] Make sure unit tests can run without acero (#35017)

Reply via email to