This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new cbea8243 Fix conftest dependency breaking nightly wheel build (#3078)
cbea8243 is described below
commit cbea824374141633b2244654ffd96c951c016595
Author: Kevin Liu <[email protected]>
AuthorDate: Sun Feb 22 17:28:32 2026 -0500
Fix conftest dependency breaking nightly wheel build (#3078)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
Context:
https://github.com/apache/iceberg-python/pull/2982#issuecomment-3909445878
Adding imports to `conftest.py` may break the nightly build pipeline.
(For example BigQuery in #2982).
This is because [nightly wheel build
tests](https://github.com/apache/iceberg-python/blob/95f6273b23524c6238aafb57fa06e693ef83d6ef/.github/workflows/pypi-build-artifacts.yml#L74-L75)
run in a narrower dependency set (`--only-group dev`), so new imports
could cause test collection to fail.
This PR inlines the imports in `conftest.py` and also include a smoke
test in CI to catch this problem going forward
## Are these changes tested?
yes, nightly build works again
https://github.com/apache/iceberg-python/actions/runs/22285169782
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
.github/workflows/pypi-build-artifacts.yml | 2 +
.github/workflows/python-ci.yml | 23 ++++++++++
pyproject.toml | 1 +
tests/conftest.py | 71 +++++++++++++++++++++++++-----
uv.lock | 2 +
5 files changed, 87 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/pypi-build-artifacts.yml
b/.github/workflows/pypi-build-artifacts.yml
index da282c6b..230209a9 100644
--- a/.github/workflows/pypi-build-artifacts.yml
+++ b/.github/workflows/pypi-build-artifacts.yml
@@ -71,6 +71,8 @@ jobs:
# Ignore 32 bit architectures
CIBW_ARCHS: "auto64"
CIBW_PROJECT_REQUIRES_PYTHON: ">=3.10,<3.14"
+ # Keep these in sync with Python CI job `cibw-dev-env-smoke-test`
+ # in .github/workflows/python-ci.yml to catch import-time
regressions early.
CIBW_BEFORE_TEST: "uv sync --directory {project} --only-group dev
--no-install-project"
CIBW_TEST_COMMAND: "uv run --directory {project} pytest
tests/avro/test_decoder.py"
# Skip free-threaded (PEP 703) builds until we evaluate decoder_fast
support
diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index ddc71d17..fadce41b 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -200,3 +200,26 @@ jobs:
merge-multiple: true
- name: Generate coverage report (75%) # Coverage threshold should only
increase over time — never decrease it!
run: COVERAGE_FAIL_UNDER=75 make coverage-report
+
+ cibw-dev-env-smoke-test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+ - uses: actions/setup-python@v6
+ with:
+ python-version: '3.12'
+ - name: Install UV
+ uses: astral-sh/setup-uv@v7
+ with:
+ enable-cache: true
+ # Why this exists:
+ # Catch import-time regressions (e.g., global conftest optional deps)
+ # in the same dev-only environment used by cibuildwheel wheel tests.
+ # Keep this in sync with wheel build test setup in
+ # .github/workflows/pypi-build-artifacts.yml:
+ # CIBW_BEFORE_TEST: uv sync --directory {project} --only-group dev
--no-install-project
+ # CIBW_TEST_COMMAND: uv run --directory {project} pytest
tests/avro/test_decoder.py
+ - name: Mirror wheel CIBW_BEFORE_TEST
+ run: uv sync --directory . --only-group dev --no-install-project
+ - name: Mirror wheel CIBW_TEST_COMMAND
+ run: uv run --directory . pytest tests/avro/test_decoder.py
diff --git a/pyproject.toml b/pyproject.toml
index 00d4a9c5..3c6624dc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -118,6 +118,7 @@ dev = [
"docutils!=0.21.post1",
"mypy-boto3-glue>=1.28.18",
"mypy-boto3-dynamodb>=1.28.18",
+ "google-cloud-bigquery>=3.33.0,<4",
"pyarrow-stubs>=20.0.0.20251107", # Remove when pyarrow >= 23.0.0
https://github.com/apache/arrow/pull/47609
"sqlalchemy>=2.0.18,<3",
]
diff --git a/tests/conftest.py b/tests/conftest.py
index 5c85f49a..cd839e50 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -49,14 +49,6 @@ from pydantic_core import to_json
from pytest_lazy_fixtures import lf
from pyiceberg.catalog import Catalog, load_catalog
-from pyiceberg.catalog.bigquery_metastore import BigQueryMetastoreCatalog
-from pyiceberg.catalog.dynamodb import DynamoDbCatalog
-from pyiceberg.catalog.glue import GlueCatalog
-from pyiceberg.catalog.hive import HiveCatalog
-from pyiceberg.catalog.memory import InMemoryCatalog
-from pyiceberg.catalog.noop import NoopCatalog
-from pyiceberg.catalog.rest import RestCatalog
-from pyiceberg.catalog.sql import SqlCatalog
from pyiceberg.expressions import BoundReference
from pyiceberg.io import (
ADLS_ACCOUNT_KEY,
@@ -2497,6 +2489,8 @@ def warehouse(tmp_path_factory: pytest.TempPathFactory)
-> Path:
@pytest.fixture
def table_v1(example_table_metadata_v1: dict[str, Any]) -> Table:
+ from pyiceberg.catalog.noop import NoopCatalog
+
table_metadata = TableMetadataV1(**example_table_metadata_v1)
return Table(
identifier=("database", "table"),
@@ -2509,6 +2503,8 @@ def table_v1(example_table_metadata_v1: dict[str, Any])
-> Table:
@pytest.fixture
def table_v2(example_table_metadata_v2: dict[str, Any]) -> Table:
+ from pyiceberg.catalog.noop import NoopCatalog
+
table_metadata = TableMetadataV2(**example_table_metadata_v2)
return Table(
identifier=("database", "table"),
@@ -2521,6 +2517,8 @@ def table_v2(example_table_metadata_v2: dict[str, Any])
-> Table:
@pytest.fixture
def table_v3(example_table_metadata_v3: dict[str, Any]) -> Table:
+ from pyiceberg.catalog.noop import NoopCatalog
+
table_metadata = TableMetadataV3(**example_table_metadata_v3)
return Table(
identifier=("database", "table"),
@@ -2535,6 +2533,8 @@ def table_v3(example_table_metadata_v3: dict[str, Any])
-> Table:
def table_v2_orc(example_table_metadata_v2: dict[str, Any]) -> Table:
import copy
+ from pyiceberg.catalog.noop import NoopCatalog
+
metadata_dict = copy.deepcopy(example_table_metadata_v2)
if not metadata_dict["properties"]:
metadata_dict["properties"] = {}
@@ -2553,6 +2553,8 @@ def table_v2_orc(example_table_metadata_v2: dict[str,
Any]) -> Table:
def table_v2_with_fixed_and_decimal_types(
table_metadata_v2_with_fixed_and_decimal_types: dict[str, Any],
) -> Table:
+ from pyiceberg.catalog.noop import NoopCatalog
+
table_metadata = TableMetadataV2(
**table_metadata_v2_with_fixed_and_decimal_types,
)
@@ -2567,6 +2569,8 @@ def table_v2_with_fixed_and_decimal_types(
@pytest.fixture
def
table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots:
dict[str, Any]) -> Table:
+ from pyiceberg.catalog.noop import NoopCatalog
+
table_metadata =
TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
return Table(
identifier=("database", "table"),
@@ -2579,6 +2583,8 @@ def
table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_s
@pytest.fixture
def table_v2_with_statistics(table_metadata_v2_with_statistics: dict[str,
Any]) -> Table:
+ from pyiceberg.catalog.noop import NoopCatalog
+
table_metadata = TableMetadataV2(**table_metadata_v2_with_statistics)
return Table(
identifier=("database", "table"),
@@ -3000,11 +3006,15 @@ def ray_session() -> Generator[Any, None, None]:
# Catalog fixtures
-def _create_memory_catalog(name: str, warehouse: Path) -> InMemoryCatalog:
+def _create_memory_catalog(name: str, warehouse: Path) -> Catalog:
+ from pyiceberg.catalog.memory import InMemoryCatalog
+
return InMemoryCatalog(name, warehouse=f"file://{warehouse}")
-def _create_sql_catalog(name: str, warehouse: Path) -> SqlCatalog:
+def _create_sql_catalog(name: str, warehouse: Path) -> Catalog:
+ from pyiceberg.catalog.sql import SqlCatalog
+
catalog = SqlCatalog(
name,
uri="sqlite:///:memory:",
@@ -3014,7 +3024,9 @@ def _create_sql_catalog(name: str, warehouse: Path) ->
SqlCatalog:
return catalog
-def _create_sql_without_rowcount_catalog(name: str, warehouse: Path) ->
SqlCatalog:
+def _create_sql_without_rowcount_catalog(name: str, warehouse: Path) ->
Catalog:
+ from pyiceberg.catalog.sql import SqlCatalog
+
props = {
"uri": f"sqlite:////{warehouse}/sql-catalog",
"warehouse": f"file://{warehouse}",
@@ -3152,48 +3164,83 @@ def test_table_properties() -> dict[str, str]:
def does_support_purge_table(catalog: Catalog) -> bool:
+ from pyiceberg.catalog.noop import NoopCatalog
+ from pyiceberg.catalog.rest import RestCatalog
+
if isinstance(catalog, RestCatalog):
return property_as_bool(catalog.properties, "supports_purge_table",
True)
+ from pyiceberg.catalog.hive import HiveCatalog
+
if isinstance(catalog, (HiveCatalog, NoopCatalog)):
return False
return True
def does_support_atomic_concurrent_updates(catalog: Catalog) -> bool:
+ from pyiceberg.catalog.noop import NoopCatalog
+ from pyiceberg.catalog.rest import RestCatalog
+
if isinstance(catalog, RestCatalog):
return property_as_bool(catalog.properties,
"supports_atomic_concurrent_updates", True)
+ from pyiceberg.catalog.hive import HiveCatalog
+
if isinstance(catalog, (HiveCatalog, NoopCatalog)):
return False
return True
def does_support_nested_namespaces(catalog: Catalog) -> bool:
+ from pyiceberg.catalog.dynamodb import DynamoDbCatalog
+ from pyiceberg.catalog.glue import GlueCatalog
+ from pyiceberg.catalog.noop import NoopCatalog
+ from pyiceberg.catalog.rest import RestCatalog
+
if isinstance(catalog, RestCatalog):
return property_as_bool(catalog.properties,
"supports_nested_namespaces", True)
- if isinstance(catalog, (HiveCatalog, NoopCatalog, GlueCatalog,
BigQueryMetastoreCatalog, DynamoDbCatalog)):
+ from pyiceberg.catalog.bigquery_metastore import BigQueryMetastoreCatalog
+ from pyiceberg.catalog.hive import HiveCatalog
+
+ if isinstance(catalog, (HiveCatalog, BigQueryMetastoreCatalog,
NoopCatalog, GlueCatalog, DynamoDbCatalog)):
return False
return True
def does_support_schema_evolution(catalog: Catalog) -> bool:
+ from pyiceberg.catalog.noop import NoopCatalog
+ from pyiceberg.catalog.rest import RestCatalog
+
if isinstance(catalog, RestCatalog):
return property_as_bool(catalog.properties,
"supports_schema_evolution", True)
+ from pyiceberg.catalog.hive import HiveCatalog
+
if isinstance(catalog, (HiveCatalog, NoopCatalog)):
return False
return True
def does_support_slash_in_identifier(catalog: Catalog) -> bool:
+ from pyiceberg.catalog.noop import NoopCatalog
+ from pyiceberg.catalog.rest import RestCatalog
+ from pyiceberg.catalog.sql import SqlCatalog
+
if isinstance(catalog, RestCatalog):
return property_as_bool(catalog.properties,
"supports_slash_in_identifier", True)
+ from pyiceberg.catalog.hive import HiveCatalog
+
if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)):
return False
return True
def does_support_dot_in_identifier(catalog: Catalog) -> bool:
+ from pyiceberg.catalog.noop import NoopCatalog
+ from pyiceberg.catalog.rest import RestCatalog
+ from pyiceberg.catalog.sql import SqlCatalog
+
if isinstance(catalog, RestCatalog):
return property_as_bool(catalog.properties,
"supports_dot_in_identifier", True)
+ from pyiceberg.catalog.hive import HiveCatalog
+
if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)):
return False
return True
diff --git a/uv.lock b/uv.lock
index 46fbc800..01da16cf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4539,6 +4539,7 @@ dev = [
{ name = "deptry" },
{ name = "docutils" },
{ name = "fastavro" },
+ { name = "google-cloud-bigquery" },
{ name = "moto", extra = ["server"] },
{ name = "mypy-boto3-dynamodb" },
{ name = "mypy-boto3-glue" },
@@ -4628,6 +4629,7 @@ dev = [
{ name = "deptry", specifier = ">=0.14,<0.25" },
{ name = "docutils", specifier = "!=0.21.post1" },
{ name = "fastavro", specifier = "==1.12.1" },
+ { name = "google-cloud-bigquery", specifier = ">=3.33.0,<4" },
{ name = "moto", extras = ["server"], specifier = ">=5.0.2,<6" },
{ name = "mypy-boto3-dynamodb", specifier = ">=1.28.18" },
{ name = "mypy-boto3-glue", specifier = ">=1.28.18" },