This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 60ebe931a Enable up037 (#2739)
60ebe931a is described below
commit 60ebe931acefd358196ddfde5467bc4f8d26dfc1
Author: Alex Stephen <[email protected]>
AuthorDate: Wed Nov 12 21:37:23 2025 +0530
Enable up037 (#2739)
<!--
Thanks for opening a pull request!
-->
Part of #2700
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
This enables linter rule UP037
## Are these changes tested?
`make lint` and `make test` should pass.
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
Co-authored-by: Kevin Liu <[email protected]>
---
pyiceberg/catalog/__init__.py | 14 ++++++------
pyiceberg/catalog/glue.py | 14 ++++++------
pyiceberg/schema.py | 2 +-
pyiceberg/table/__init__.py | 2 +-
pyiceberg/table/inspect.py | 44 +++++++++++++++++++-------------------
pyiceberg/table/update/schema.py | 2 +-
pyiceberg/table/update/snapshot.py | 4 ++--
ruff.toml | 1 -
8 files changed, 41 insertions(+), 42 deletions(-)
diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py
index 5b3906294..7e467cd6c 100644
--- a/pyiceberg/catalog/__init__.py
+++ b/pyiceberg/catalog/__init__.py
@@ -361,7 +361,7 @@ class Catalog(ABC):
def create_table(
self,
identifier: str | Identifier,
- schema: Schema | "pa.Schema",
+ schema: Schema | pa.Schema,
location: str | None = None,
partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
sort_order: SortOrder = UNSORTED_SORT_ORDER,
@@ -388,7 +388,7 @@ class Catalog(ABC):
def create_table_transaction(
self,
identifier: str | Identifier,
- schema: Schema | "pa.Schema",
+ schema: Schema | pa.Schema,
location: str | None = None,
partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
sort_order: SortOrder = UNSORTED_SORT_ORDER,
@@ -411,7 +411,7 @@ class Catalog(ABC):
def create_table_if_not_exists(
self,
identifier: str | Identifier,
- schema: Schema | "pa.Schema",
+ schema: Schema | pa.Schema,
location: str | None = None,
partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
sort_order: SortOrder = UNSORTED_SORT_ORDER,
@@ -753,7 +753,7 @@ class Catalog(ABC):
@staticmethod
def _convert_schema_if_needed(
- schema: Schema | "pa.Schema", format_version: TableVersion =
TableProperties.DEFAULT_FORMAT_VERSION
+ schema: Schema | pa.Schema, format_version: TableVersion =
TableProperties.DEFAULT_FORMAT_VERSION
) -> Schema:
if isinstance(schema, Schema):
return schema
@@ -799,7 +799,7 @@ class Catalog(ABC):
Default implementation does nothing. Override in subclasses that need
cleanup.
"""
- def __enter__(self) -> "Catalog":
+ def __enter__(self) -> Catalog:
"""Enter the context manager.
Returns:
@@ -829,7 +829,7 @@ class MetastoreCatalog(Catalog, ABC):
def create_table_transaction(
self,
identifier: str | Identifier,
- schema: Schema | "pa.Schema",
+ schema: Schema | pa.Schema,
location: str | None = None,
partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
sort_order: SortOrder = UNSORTED_SORT_ORDER,
@@ -869,7 +869,7 @@ class MetastoreCatalog(Catalog, ABC):
def _create_staged_table(
self,
identifier: str | Identifier,
- schema: Schema | "pa.Schema",
+ schema: Schema | pa.Schema,
location: str | None = None,
partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
sort_order: SortOrder = UNSORTED_SORT_ORDER,
diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py
index f19cb6dec..2474f0e78 100644
--- a/pyiceberg/catalog/glue.py
+++ b/pyiceberg/catalog/glue.py
@@ -199,7 +199,7 @@ class _IcebergSchemaToGlueType(SchemaVisitor[str]):
def _to_columns(metadata: TableMetadata) -> List["ColumnTypeDef"]:
- results: Dict[str, "ColumnTypeDef"] = {}
+ results: Dict[str, ColumnTypeDef] = {}
def _append_to_results(field: NestedField, is_current: bool) -> None:
if field.name in results:
@@ -241,7 +241,7 @@ def _construct_table_input(
glue_table: Optional["TableTypeDef"] = None,
prev_metadata_location: str | None = None,
) -> "TableInputTypeDef":
- table_input: "TableInputTypeDef" = {
+ table_input: TableInputTypeDef = {
"Name": table_name,
"TableType": EXTERNAL_TABLE,
"Parameters": _construct_parameters(metadata_location, glue_table,
prev_metadata_location, properties),
@@ -258,7 +258,7 @@ def _construct_table_input(
def _construct_rename_table_input(to_table_name: str, glue_table:
"TableTypeDef") -> "TableInputTypeDef":
- rename_table_input: "TableInputTypeDef" = {"Name": to_table_name}
+ rename_table_input: TableInputTypeDef = {"Name": to_table_name}
# use the same Glue info to create the new table, pointing to the old
metadata
if not glue_table["TableType"]:
raise ValueError("Glue table type is missing, cannot rename table")
@@ -283,7 +283,7 @@ def _construct_rename_table_input(to_table_name: str,
glue_table: "TableTypeDef"
def _construct_database_input(database_name: str, properties: Properties) ->
"DatabaseInputTypeDef":
- database_input: "DatabaseInputTypeDef" = {"Name": database_name}
+ database_input: DatabaseInputTypeDef = {"Name": database_name}
parameters = {}
for k, v in properties.items():
if k == "Description":
@@ -506,7 +506,7 @@ class GlueCatalog(MetastoreCatalog):
table_identifier = table.name()
database_name, table_name =
self.identifier_to_database_and_table(table_identifier, NoSuchTableError)
- current_glue_table: "TableTypeDef" | None
+ current_glue_table: TableTypeDef | None
glue_table_version_id: str | None
current_table: Table | None
try:
@@ -718,7 +718,7 @@ class GlueCatalog(MetastoreCatalog):
NoSuchNamespaceError: If a namespace with the given name does not
exist, or the identifier is invalid.
"""
database_name = self.identifier_to_database(namespace,
NoSuchNamespaceError)
- table_list: List["TableTypeDef"] = []
+ table_list: List[TableTypeDef] = []
next_token: str | None = None
try:
while True:
@@ -746,7 +746,7 @@ class GlueCatalog(MetastoreCatalog):
if namespace:
return []
- database_list: List["DatabaseTypeDef"] = []
+ database_list: List[DatabaseTypeDef] = []
next_token: str | None = None
while True:
diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py
index f6e4684b9..7e9989250 100644
--- a/pyiceberg/schema.py
+++ b/pyiceberg/schema.py
@@ -186,7 +186,7 @@ class Schema(IcebergBaseModel):
"""Return the schema as a struct."""
return StructType(*self.fields)
- def as_arrow(self) -> "pa.Schema":
+ def as_arrow(self) -> pa.Schema:
"""Return the schema as an Arrow schema."""
from pyiceberg.io.pyarrow import schema_to_pyarrow
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index abc225d4c..42ea40980 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -1555,7 +1555,7 @@ class Table:
return pl.scan_iceberg(self)
- def __datafusion_table_provider__(self) -> "IcebergDataFusionTable":
+ def __datafusion_table_provider__(self) -> IcebergDataFusionTable:
"""Return the DataFusion table provider PyCapsule interface.
To support DataFusion features such as push down filtering, this
function will return a PyCapsule
diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py
index c4591a40e..45dc735a1 100644
--- a/pyiceberg/table/inspect.py
+++ b/pyiceberg/table/inspect.py
@@ -60,7 +60,7 @@ class InspectTable:
else:
raise ValueError("Cannot get a snapshot as the table does not have
any.")
- def snapshots(self) -> "pa.Table":
+ def snapshots(self) -> pa.Table:
import pyarrow as pa
snapshots_schema = pa.schema(
@@ -98,7 +98,7 @@ class InspectTable:
schema=snapshots_schema,
)
- def entries(self, snapshot_id: int | None = None) -> "pa.Table":
+ def entries(self, snapshot_id: int | None = None) -> pa.Table:
import pyarrow as pa
from pyiceberg.io.pyarrow import schema_to_pyarrow
@@ -229,7 +229,7 @@ class InspectTable:
schema=entries_schema,
)
- def refs(self) -> "pa.Table":
+ def refs(self) -> pa.Table:
import pyarrow as pa
ref_schema = pa.schema(
@@ -264,7 +264,7 @@ class InspectTable:
snapshot_id: int | None = None,
row_filter: str | BooleanExpression = ALWAYS_TRUE,
case_sensitive: bool = True,
- ) -> "pa.Table":
+ ) -> pa.Table:
import pyarrow as pa
from pyiceberg.io.pyarrow import schema_to_pyarrow
@@ -368,7 +368,7 @@ class InspectTable:
else:
raise ValueError(f"Unknown DataFileContent ({file.content})")
- def _get_manifests_schema(self) -> "pa.Schema":
+ def _get_manifests_schema(self) -> pa.Schema:
import pyarrow as pa
partition_summary_schema = pa.struct(
@@ -398,14 +398,14 @@ class InspectTable:
)
return manifest_schema
- def _get_all_manifests_schema(self) -> "pa.Schema":
+ def _get_all_manifests_schema(self) -> pa.Schema:
import pyarrow as pa
all_manifests_schema = self._get_manifests_schema()
all_manifests_schema =
all_manifests_schema.append(pa.field("reference_snapshot_id", pa.int64(),
nullable=False))
return all_manifests_schema
- def _generate_manifests_table(self, snapshot: Snapshot | None,
is_all_manifests_table: bool = False) -> "pa.Table":
+ def _generate_manifests_table(self, snapshot: Snapshot | None,
is_all_manifests_table: bool = False) -> pa.Table:
import pyarrow as pa
def _partition_summaries_to_rows(
@@ -474,10 +474,10 @@ class InspectTable:
schema=self._get_all_manifests_schema() if is_all_manifests_table
else self._get_manifests_schema(),
)
- def manifests(self) -> "pa.Table":
+ def manifests(self) -> pa.Table:
return self._generate_manifests_table(self.tbl.current_snapshot())
- def metadata_log_entries(self) -> "pa.Table":
+ def metadata_log_entries(self) -> pa.Table:
import pyarrow as pa
from pyiceberg.table.snapshots import MetadataLogEntry
@@ -513,7 +513,7 @@ class InspectTable:
schema=table_schema,
)
- def history(self) -> "pa.Table":
+ def history(self) -> pa.Table:
import pyarrow as pa
history_schema = pa.schema(
@@ -546,7 +546,7 @@ class InspectTable:
def _get_files_from_manifest(
self, manifest_list: ManifestFile, data_file_filter:
Set[DataFileContent] | None = None
- ) -> "pa.Table":
+ ) -> pa.Table:
import pyarrow as pa
files: list[dict[str, Any]] = []
@@ -610,7 +610,7 @@ class InspectTable:
schema=self._get_files_schema(),
)
- def _get_files_schema(self) -> "pa.Schema":
+ def _get_files_schema(self) -> pa.Schema:
import pyarrow as pa
from pyiceberg.io.pyarrow import schema_to_pyarrow
@@ -663,7 +663,7 @@ class InspectTable:
)
return files_schema
- def _files(self, snapshot_id: int | None = None, data_file_filter:
Set[DataFileContent] | None = None) -> "pa.Table":
+ def _files(self, snapshot_id: int | None = None, data_file_filter:
Set[DataFileContent] | None = None) -> pa.Table:
import pyarrow as pa
if not snapshot_id and not self.tbl.metadata.current_snapshot():
@@ -680,16 +680,16 @@ class InspectTable:
)
return pa.concat_tables(results)
- def files(self, snapshot_id: int | None = None) -> "pa.Table":
+ def files(self, snapshot_id: int | None = None) -> pa.Table:
return self._files(snapshot_id)
- def data_files(self, snapshot_id: int | None = None) -> "pa.Table":
+ def data_files(self, snapshot_id: int | None = None) -> pa.Table:
return self._files(snapshot_id, {DataFileContent.DATA})
- def delete_files(self, snapshot_id: int | None = None) -> "pa.Table":
+ def delete_files(self, snapshot_id: int | None = None) -> pa.Table:
return self._files(snapshot_id, {DataFileContent.POSITION_DELETES,
DataFileContent.EQUALITY_DELETES})
- def all_manifests(self) -> "pa.Table":
+ def all_manifests(self) -> pa.Table:
import pyarrow as pa
snapshots = self.tbl.snapshots()
@@ -697,12 +697,12 @@ class InspectTable:
return pa.Table.from_pylist([],
schema=self._get_all_manifests_schema())
executor = ExecutorFactory.get_or_create()
- manifests_by_snapshots: Iterator["pa.Table"] = executor.map(
+ manifests_by_snapshots: Iterator[pa.Table] = executor.map(
lambda args: self._generate_manifests_table(*args), [(snapshot,
True) for snapshot in snapshots]
)
return pa.concat_tables(manifests_by_snapshots)
- def _all_files(self, data_file_filter: Set[DataFileContent] | None = None)
-> "pa.Table":
+ def _all_files(self, data_file_filter: Set[DataFileContent] | None = None)
-> pa.Table:
import pyarrow as pa
snapshots = self.tbl.snapshots()
@@ -720,11 +720,11 @@ class InspectTable:
return pa.concat_tables(file_lists)
- def all_files(self) -> "pa.Table":
+ def all_files(self) -> pa.Table:
return self._all_files()
- def all_data_files(self) -> "pa.Table":
+ def all_data_files(self) -> pa.Table:
return self._all_files({DataFileContent.DATA})
- def all_delete_files(self) -> "pa.Table":
+ def all_delete_files(self) -> pa.Table:
return self._all_files({DataFileContent.POSITION_DELETES,
DataFileContent.EQUALITY_DELETES})
diff --git a/pyiceberg/table/update/schema.py b/pyiceberg/table/update/schema.py
index f28e0aa2a..851d68386 100644
--- a/pyiceberg/table/update/schema.py
+++ b/pyiceberg/table/update/schema.py
@@ -145,7 +145,7 @@ class UpdateSchema(UpdateTableMetadata["UpdateSchema"]):
def union_by_name(
# TODO: Move TableProperties.DEFAULT_FORMAT_VERSION to separate file
and set that as format_version default.
self,
- new_schema: Schema | "pa.Schema",
+ new_schema: Schema | pa.Schema,
format_version: TableVersion = 2,
) -> UpdateSchema:
from pyiceberg.catalog import Catalog
diff --git a/pyiceberg/table/update/snapshot.py
b/pyiceberg/table/update/snapshot.py
index 191e4a9bf..d59afbf68 100644
--- a/pyiceberg/table/update/snapshot.py
+++ b/pyiceberg/table/update/snapshot.py
@@ -1012,7 +1012,7 @@ class
ExpireSnapshots(UpdateTableMetadata["ExpireSnapshots"]):
return self
- def by_ids(self, snapshot_ids: List[int]) -> "ExpireSnapshots":
+ def by_ids(self, snapshot_ids: List[int]) -> ExpireSnapshots:
"""
Expire multiple snapshots by their IDs.
@@ -1027,7 +1027,7 @@ class
ExpireSnapshots(UpdateTableMetadata["ExpireSnapshots"]):
self.by_id(snapshot_id)
return self
- def older_than(self, dt: datetime) -> "ExpireSnapshots":
+ def older_than(self, dt: datetime) -> ExpireSnapshots:
"""
Expire all unprotected snapshots with a timestamp older than a given
value.
diff --git a/ruff.toml b/ruff.toml
index efd14fea7..d439caf74 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -59,7 +59,6 @@ select = [
]
ignore = [
"E501",
- "UP037",
"UP035",
"UP006"
]