This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2e78adbe15 GH-35014: [Python] Make sure unit tests can run without
acero (#35017)
2e78adbe15 is described below
commit 2e78adbe15cd4c58dd30ccc4bf2d6d5e0eb2bf4d
Author: Will Jones <[email protected]>
AuthorDate: Wed Apr 12 08:53:08 2023 -0700
GH-35014: [Python] Make sure unit tests can run without acero (#35017)
### Rationale for this change
Fixes two failing nightlies:
*
[example-python-minimal-build-fedora-conda](https://github.com/ursacomputing/crossbow/actions/runs/4653275808/jobs/8234009627)
*
[example-python-minimal-build-ubuntu-venv](https://github.com/ursacomputing/crossbow/actions/runs/4653280100/jobs/8234017973)
### What changes are included in this PR?
Adds a pytest mark for Acero, since it is now required.
### Are these changes tested?
Yes, this fixes existing tests. Validated it works locally.
### Are there any user-facing changes?
No.
* Closes: #35014
Authored-by: Will Jones <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/acero.py | 8 +++++++-
python/pyarrow/conftest.py | 8 ++++++++
python/pyarrow/tests/parquet/test_dataset.py | 2 +-
python/pyarrow/tests/test_acero.py | 23 ++++++++++++++---------
python/pyarrow/tests/test_exec_plan.py | 9 +++++++--
python/pyarrow/tests/test_table.py | 11 ++++++-----
6 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index 00d30a3250..6a72ea1996 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -41,7 +41,13 @@ try:
import pyarrow.dataset as ds
from pyarrow._dataset import ScanNodeOptions
except ImportError:
- ds = None
+ class DatasetModuleStub:
+ class Dataset:
+ pass
+
+ class InMemoryDataset:
+ pass
+ ds = DatasetModuleStub
def _dataset_to_decl(dataset, use_threads=True):
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 42866dbc75..ef09393cfb 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -21,6 +21,7 @@ from pyarrow import Codec
from pyarrow import fs
groups = [
+ 'acero',
'brotli',
'bz2',
'cython',
@@ -50,6 +51,7 @@ groups = [
]
defaults = {
+ 'acero': False,
'brotli': Codec.is_available('brotli'),
'bz2': Codec.is_available('bz2'),
'cython': False,
@@ -96,6 +98,12 @@ try:
except ImportError:
pass
+try:
+ import pyarrow.acero # noqa
+ defaults['acero'] = True
+except ImportError:
+ pass
+
try:
import pyarrow.dataset # noqa
defaults['dataset'] = True
diff --git a/python/pyarrow/tests/parquet/test_dataset.py
b/python/pyarrow/tests/parquet/test_dataset.py
index fd24f1642f..d8b97afeb6 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1470,7 +1470,7 @@ def
test_write_to_dataset_with_partitions_and_custom_filenames(
partition_by, partition_filename_callback,
use_legacy_dataset=use_legacy_dataset)
- dataset = pq.ParquetDataset(path)
+ dataset = pq.ParquetDataset(path, use_legacy_dataset=use_legacy_dataset)
# ARROW-3538: Ensure partition filenames match the given pattern
# defined in the local function partition_filename_callback
diff --git a/python/pyarrow/tests/test_acero.py
b/python/pyarrow/tests/test_acero.py
index 7db4afd000..988e9b6e31 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -21,15 +21,18 @@ import pyarrow as pa
import pyarrow.compute as pc
from pyarrow.compute import field
-from pyarrow.acero import (
- Declaration,
- TableSourceNodeOptions,
- FilterNodeOptions,
- ProjectNodeOptions,
- AggregateNodeOptions,
- OrderByNodeOptions,
- HashJoinNodeOptions,
-)
+try:
+ from pyarrow.acero import (
+ Declaration,
+ TableSourceNodeOptions,
+ FilterNodeOptions,
+ ProjectNodeOptions,
+ AggregateNodeOptions,
+ OrderByNodeOptions,
+ HashJoinNodeOptions,
+ )
+except ImportError:
+ pass
try:
import pyarrow.dataset as ds
@@ -37,6 +40,8 @@ try:
except ImportError:
ds = None
+pytestmark = pytest.mark.acero
+
@pytest.fixture
def table_source():
diff --git a/python/pyarrow/tests/test_exec_plan.py
b/python/pyarrow/tests/test_exec_plan.py
index 599b753306..0fc3d4ec75 100644
--- a/python/pyarrow/tests/test_exec_plan.py
+++ b/python/pyarrow/tests/test_exec_plan.py
@@ -22,11 +22,15 @@ from .test_extension_type import IntegerType
try:
import pyarrow.dataset as ds
+except ImportError:
+ pass
+
+try:
from pyarrow.acero import _perform_join, _filter_table
except ImportError:
pass
-pytestmark = pytest.mark.dataset
+pytestmark = pytest.mark.acero
def test_joins_corner_cases():
@@ -89,7 +93,8 @@ def test_joins_corner_cases():
})
])
@pytest.mark.parametrize("use_threads", [True, False])
[email protected]("use_datasets", [False, True])
[email protected]("use_datasets",
+ [False, pytest.param(True,
marks=pytest.mark.dataset)])
def test_joins(jointype, expected, use_threads, use_datasets):
# Allocate table here instead of using parametrize
# this prevents having arrow allocated memory forever around.
diff --git a/python/pyarrow/tests/test_table.py
b/python/pyarrow/tests/test_table.py
index 1a0eaa3377..1945afe3bb 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1989,6 +1989,7 @@ def test_table_select():
assert result.equals(expected)
[email protected]
def test_table_group_by():
def sorted_by_keys(d):
# Ensure a guaranteed order of keys for aggregation results.
@@ -2156,7 +2157,7 @@ def test_table_to_recordbatchreader():
assert reader.read_next_batch().num_rows == 1
[email protected]
[email protected]
def test_table_join():
t1 = pa.table({
"colA": [1, 2, 6],
@@ -2183,7 +2184,7 @@ def test_table_join():
})
[email protected]
[email protected]
def test_table_join_unique_key():
t1 = pa.table({
"colA": [1, 2, 6],
@@ -2210,7 +2211,7 @@ def test_table_join_unique_key():
})
[email protected]
[email protected]
def test_table_join_collisions():
t1 = pa.table({
"colA": [1, 2, 6],
@@ -2234,7 +2235,7 @@ def test_table_join_collisions():
], names=["colA", "colB", "colVals", "colB", "colVals"])
[email protected]
[email protected]
def test_table_filter_expression():
t1 = pa.table({
"colA": [1, 2, 6],
@@ -2258,7 +2259,7 @@ def test_table_filter_expression():
})
[email protected]
[email protected]
def test_table_join_many_columns():
t1 = pa.table({
"colA": [1, 2, 6],