This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 79a05574 Remove E501 from ruff ignore list (#2949)
79a05574 is described below
commit 79a05574b18e38c9298632437b6f20e93eca7f62
Author: Kevin Liu <[email protected]>
AuthorDate: Sun Jan 25 15:36:56 2026 -0500
Remove E501 from ruff ignore list (#2949)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
Closes #2700 (last one!)
This removes the exception for E501 from Ruff.
https://docs.astral.sh/ruff/rules/line-too-long/
Refactor only
## Are these changes tested?
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
pyiceberg/catalog/glue.py | 3 +-
pyiceberg/catalog/rest/auth.py | 6 ++-
pyiceberg/catalog/sql.py | 3 +-
pyiceberg/conversions.py | 5 +-
pyiceberg/expressions/__init__.py | 12 +++--
pyiceberg/expressions/visitors.py | 3 +-
pyiceberg/io/__init__.py | 3 +-
pyiceberg/io/pyarrow.py | 33 +++++++++----
pyiceberg/manifest.py | 3 +-
pyiceberg/schema.py | 27 ++++++++---
pyiceberg/table/__init__.py | 20 +++++---
pyiceberg/table/locations.py | 3 +-
pyiceberg/table/metadata.py | 3 +-
pyiceberg/table/snapshots.py | 6 ++-
pyiceberg/table/sorting.py | 3 +-
pyiceberg/table/update/__init__.py | 24 ++++++----
pyiceberg/table/update/snapshot.py | 3 +-
pyiceberg/utils/schema_conversion.py | 6 ++-
ruff.toml | 4 +-
tests/catalog/test_hive.py | 5 +-
tests/catalog/test_rest.py | 36 ++++++++++-----
tests/cli/test_console.py | 12 ++---
tests/conftest.py | 29 +++++++++---
tests/expressions/test_evaluator.py | 4 +-
tests/expressions/test_visitors.py | 20 ++++----
tests/integration/test_add_files.py | 12 ++---
tests/integration/test_inspect_table.py | 17 +++++--
tests/integration/test_partitioning_key.py | 26 +++++++----
tests/integration/test_reads.py | 3 +-
.../test_writes/test_partitioned_writes.py | 11 +++--
tests/io/test_fsspec.py | 49 ++++++++++++++++----
tests/io/test_pyarrow.py | 52 +++++++++++++--------
tests/io/test_pyarrow_visitor.py | 19 +++++---
tests/table/test_name_mapping.py | 16 ++++---
tests/table/test_partitioning.py | 13 ++++--
tests/table/test_refs.py | 6 +--
tests/table/test_snapshots.py | 54 ++++++++++++++--------
tests/table/test_sorting.py | 25 ++++++++--
tests/table/test_upsert.py | 5 +-
tests/test_conversions.py | 3 +-
tests/test_schema.py | 43 +++++++++++++----
tests/test_types.py | 12 ++---
tests/utils/test_manifest.py | 17 ++++---
43 files changed, 454 insertions(+), 205 deletions(-)
diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py
index 7260b294..e55257af 100644
--- a/pyiceberg/catalog/glue.py
+++ b/pyiceberg/catalog/glue.py
@@ -406,7 +406,8 @@ class GlueCatalog(MetastoreCatalog):
raise NoSuchTableError(f"Table does not exist:
{database_name}.{table_name} (Glue table version {version_id})") from e
except self.glue.exceptions.ConcurrentModificationException as e:
raise CommitFailedException(
- f"Cannot commit {database_name}.{table_name} because Glue
detected concurrent update to table version {version_id}"
+ f"Cannot commit {database_name}.{table_name} because Glue
detected concurrent update "
+ f"to table version {version_id}"
) from e
def _get_glue_table(self, database_name: str, table_name: str) ->
"TableTypeDef":
diff --git a/pyiceberg/catalog/rest/auth.py b/pyiceberg/catalog/rest/auth.py
index 4f67ab90..72235d87 100644
--- a/pyiceberg/catalog/rest/auth.py
+++ b/pyiceberg/catalog/rest/auth.py
@@ -181,7 +181,8 @@ class OAuth2TokenProvider:
expires_in = result.get("expires_in", self.expires_in)
if expires_in is None:
raise ValueError(
- "The expiration time of the Token must be provided by the
Server in the Access Token Response in `expires_in` field, or by the PyIceberg
Client."
+ "The expiration time of the Token must be provided by the
Server in the Access Token Response "
+ "in `expires_in` field, or by the PyIceberg Client."
)
self._expires_at = time.monotonic() + expires_in - self.refresh_margin
@@ -249,8 +250,9 @@ class GoogleAuthManager(AuthManager):
class AuthManagerAdapter(AuthBase):
- """A `requests.auth.AuthBase` adapter that integrates an `AuthManager`
into a `requests.Session` to automatically attach the appropriate Authorization
header to every request.
+ """A `requests.auth.AuthBase` adapter for integrating an `AuthManager`
into a `requests.Session`.
+ This adapter automatically attaches the appropriate Authorization header
to every request.
This adapter is useful when working with `requests.Session.auth`
and allows reuse of authentication strategies defined by `AuthManager`.
This AuthManagerAdapter is only intended to be used against the REST
Catalog
diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py
index 2b6fa745..a2704c3d 100644
--- a/pyiceberg/catalog/sql.py
+++ b/pyiceberg/catalog/sql.py
@@ -109,7 +109,8 @@ class SqlCatalog(MetastoreCatalog):
And you can have as many levels as you want, but you need at least one.
The `SqlCatalog` honors the same convention.
In the `JDBCCatalog` implementation, a `TableIdentifier` is composed of an
optional `Namespace` and a table name.
- When a `Namespace` is present, the full name will be
`'ns1.ns2.ns3.table'`. A valid `TableIdentifier` could be `'name'` (no
namespace).
+ When a `Namespace` is present, the full name will be `'ns1.ns2.ns3.table'`.
+ A valid `TableIdentifier` could be `'name'` (no namespace).
The `SqlCatalog` has a different convention where a `TableIdentifier`
requires a `Namespace`.
"""
diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index 8739a1ab..d599b555 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -188,8 +188,9 @@ def to_bytes(
) -> bytes:
"""Convert a built-in python value to bytes.
- This conversion follows the serialization scheme for storing single values
as individual binary values defined in the Iceberg specification that
- can be found at
https://iceberg.apache.org/spec/#appendix-d-single-value-serialization
+ This conversion follows the serialization scheme for storing single values
as individual binary values
+ defined in the Iceberg specification that can be found at
+ https://iceberg.apache.org/spec/#appendix-d-single-value-serialization
Args:
primitive_type (PrimitiveType): An implementation of the PrimitiveType
base class.
diff --git a/pyiceberg/expressions/__init__.py
b/pyiceberg/expressions/__init__.py
index 14ee9328..3910a146 100644
--- a/pyiceberg/expressions/__init__.py
+++ b/pyiceberg/expressions/__init__.py
@@ -690,12 +690,14 @@ class SetPredicate(UnboundPredicate, ABC):
def __str__(self) -> str:
"""Return the string representation of the SetPredicate class."""
# Sort to make it deterministic
- return f"{str(self.__class__.__name__)}({str(self.term)}, {{{',
'.join(sorted([str(literal) for literal in self.literals]))}}})"
+ literals_str = ", ".join(sorted([str(literal) for literal in
self.literals]))
+ return f"{str(self.__class__.__name__)}({str(self.term)},
{{{literals_str}}})"
def __repr__(self) -> str:
"""Return the string representation of the SetPredicate class."""
# Sort to make it deterministic
- return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{',
'.join(sorted([repr(literal) for literal in self.literals]))}}})"
+ literals_repr = ", ".join(sorted([repr(literal) for literal in
self.literals]))
+ return f"{str(self.__class__.__name__)}({repr(self.term)},
{{{literals_repr}}})"
def __eq__(self, other: Any) -> bool:
"""Return the equality of two instances of the SetPredicate class."""
@@ -725,12 +727,14 @@ class BoundSetPredicate(BoundPredicate, ABC):
def __str__(self) -> str:
"""Return the string representation of the BoundSetPredicate class."""
# Sort to make it deterministic
- return f"{str(self.__class__.__name__)}({str(self.term)}, {{{',
'.join(sorted([str(literal) for literal in self.literals]))}}})"
+ literals_str = ", ".join(sorted([str(literal) for literal in
self.literals]))
+ return f"{str(self.__class__.__name__)}({str(self.term)},
{{{literals_str}}})"
def __repr__(self) -> str:
"""Return the string representation of the BoundSetPredicate class."""
# Sort to make it deterministic
- return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{',
'.join(sorted([repr(literal) for literal in self.literals]))}}})"
+ literals_repr = ", ".join(sorted([repr(literal) for literal in
self.literals]))
+ return f"{str(self.__class__.__name__)}({repr(self.term)},
{{{literals_repr}}})"
def __eq__(self, other: Any) -> bool:
"""Return the equality of two instances of the BoundSetPredicate
class."""
diff --git a/pyiceberg/expressions/visitors.py
b/pyiceberg/expressions/visitors.py
index e4ab3bef..0beb0f3d 100644
--- a/pyiceberg/expressions/visitors.py
+++ b/pyiceberg/expressions/visitors.py
@@ -139,7 +139,8 @@ def visit(obj: BooleanExpression, visitor:
BooleanExpressionVisitor[T]) -> T:
Args:
obj (BooleanExpression): An instance of a BooleanExpression.
- visitor (BooleanExpressionVisitor[T]): An instance of an
implementation of the generic BooleanExpressionVisitor base class.
+ visitor (BooleanExpressionVisitor[T]): An instance of an
implementation of the generic
+ BooleanExpressionVisitor base class.
Raises:
NotImplementedError: If attempting to visit an unsupported expression.
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 87d155a0..85bd402d 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -368,5 +368,6 @@ def load_file_io(properties: Properties = EMPTY_DICT,
location: str | None = Non
return PyArrowFileIO(properties)
except ModuleNotFoundError as e:
raise ModuleNotFoundError(
- 'Could not load a FileIO, please consider installing one: pip3
install "pyiceberg[pyarrow]", for more options refer to the docs.'
+ "Could not load a FileIO, please consider installing one: "
+ 'pip3 install "pyiceberg[pyarrow]", for more options refer to the
docs.'
) from e
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index d07510d4..2360cf20 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -248,7 +248,9 @@ class PyArrowLocalFileSystem(pyarrow.fs.LocalFileSystem):
class PyArrowFile(InputFile, OutputFile):
- """A combined InputFile and OutputFile implementation that uses a pyarrow
filesystem to generate pyarrow.lib.NativeFile instances.
+ """A combined InputFile and OutputFile implementation using pyarrow
filesystem.
+
+ This class generates pyarrow.lib.NativeFile instances.
Args:
location (str): A URI or a path to a local file.
@@ -645,8 +647,9 @@ class PyArrowFileIO(FileIO):
"""Delete the file at the given location.
Args:
- location (Union[str, InputFile, OutputFile]): The URI to the
file--if an InputFile instance or an OutputFile instance is provided,
- the location attribute for that instance is used as the
location to delete.
+ location (Union[str, InputFile, OutputFile]): The URI to the
file--if an InputFile instance or
+ an OutputFile instance is provided, the location attribute for
that instance is used as
+ the location to delete.
Raises:
FileNotFoundError: When the file at the provided location does not
exist.
@@ -1014,7 +1017,10 @@ class
_NullNaNUnmentionedTermsCollector(BoundBooleanExpressionVisitor[None]):
self,
expr: BooleanExpression,
) -> None:
- """Collect the bound references categorized by having at least one
is_null or is_not_null in the expr and the remaining."""
+ """Collect bound references categorized by null predicates.
+
+ Categorizes by having at least one is_null or is_not_null in the expr
and the remaining.
+ """
boolean_expression_visit(expr, self)
@@ -1035,7 +1041,8 @@ def expression_to_pyarrow(expr: BooleanExpression,
schema: Schema | None = None)
def _expression_to_complementary_pyarrow(expr: BooleanExpression, schema:
Schema | None = None) -> pc.Expression:
"""Complementary filter conversion function of expression_to_pyarrow.
- Could not use expression_to_pyarrow(Not(expr)) to achieve this
complementary effect because ~ in pyarrow.compute.Expression does not handle
null.
+ Could not use expression_to_pyarrow(Not(expr)) to achieve this
complementary effect because
+ ~ in pyarrow.compute.Expression does not handle null.
"""
collector = _NullNaNUnmentionedTermsCollector()
collector.collect(expr)
@@ -1417,7 +1424,9 @@ class _ConvertToIceberg(PyArrowSchemaVisitor[IcebergType
| Schema]):
return TimestampNanoType()
else:
raise TypeError(
- "Iceberg does not yet support 'ns' timestamp
precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to
automatically downcast 'ns' to 'us' on write.",
+ "Iceberg does not yet support 'ns' timestamp
precision. "
+ "Use 'downcast-ns-timestamp-to-us-on-write'
configuration property to automatically "
+ "downcast 'ns' to 'us' on write.",
)
else:
raise TypeError(f"Unsupported precision for timestamp type:
{primitive.unit}")
@@ -1580,7 +1589,8 @@ def _task_to_record_batches(
fragment = arrow_format.make_fragment(fin)
physical_schema = fragment.physical_schema
- # For V1 and V2, we only support Timestamp 'us' in Iceberg Schema,
therefore it is reasonable to always cast 'ns' timestamp to 'us' on read.
+ # For V1 and V2, we only support Timestamp 'us' in Iceberg Schema,
+ # therefore it is reasonable to always cast 'ns' timestamp to 'us' on
read.
# For V3 this has to set explicitly to avoid nanosecond timestamp to
be down-casted by default
downcast_ns_timestamp_to_us = (
downcast_ns_timestamp_to_us if downcast_ns_timestamp_to_us is not
None else format_version <= 2
@@ -2450,7 +2460,8 @@ class DataFileStatistics:
if not iceberg_transform.preserves_order:
raise ValueError(
- f"Cannot infer partition value from parquet metadata for a
non-linear Partition Field: {partition_field.name} with transform
{partition_field.transform}"
+ f"Cannot infer partition value from parquet metadata for a
non-linear Partition Field: "
+ f"{partition_field.name} with transform
{partition_field.transform}"
)
transform_func = iceberg_transform.transform(source_field.field_type)
@@ -2471,7 +2482,8 @@ class DataFileStatistics:
)
if lower_value != upper_value:
raise ValueError(
- f"Cannot infer partition value from parquet metadata as there
are more than one partition values for Partition Field: {partition_field.name}.
{lower_value=}, {upper_value=}"
+ f"Cannot infer partition value from parquet metadata as there
are more than one partition values "
+ f"for Partition Field: {partition_field.name}. {lower_value=},
{upper_value=}"
)
return lower_value
@@ -2738,7 +2750,8 @@ def _check_pyarrow_schema_compatible(
)
additional_names = set(provided_schema._name_to_id.keys()) -
set(requested_schema._name_to_id.keys())
raise ValueError(
- f"PyArrow table contains more columns: {',
'.join(sorted(additional_names))}. Update the schema first (hint, use
union_by_name)."
+ f"PyArrow table contains more columns: {',
'.join(sorted(additional_names))}. "
+ "Update the schema first (hint, use union_by_name)."
) from e
_check_schema_compatible(requested_schema, provided_schema)
diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index 0afa1666..bd83075b 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -1319,7 +1319,8 @@ class ManifestListWriterV2(ManifestListWriter):
# To validate this, check that the snapshot id matches the current
commit
if self._commit_snapshot_id !=
wrapped_manifest_file.added_snapshot_id:
raise ValueError(
- f"Found unassigned sequence number for a manifest from
snapshot: {self._commit_snapshot_id} !=
{wrapped_manifest_file.added_snapshot_id}"
+ f"Found unassigned sequence number for a manifest from
snapshot: "
+ f"{self._commit_snapshot_id} !=
{wrapped_manifest_file.added_snapshot_id}"
)
wrapped_manifest_file.sequence_number = self._sequence_number
diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py
index 5896e7e1..94a7ba59 100644
--- a/pyiceberg/schema.py
+++ b/pyiceberg/schema.py
@@ -104,7 +104,8 @@ class Schema(IcebergBaseModel):
def __repr__(self) -> str:
"""Return the string representation of the Schema class."""
- return f"Schema({', '.join(repr(column) for column in self.columns)},
schema_id={self.schema_id}, identifier_field_ids={self.identifier_field_ids})"
+ columns_repr = ", ".join(repr(column) for column in self.columns)
+ return f"Schema({columns_repr}, schema_id={self.schema_id},
identifier_field_ids={self.identifier_field_ids})"
def __len__(self) -> int:
"""Return the length of an instance of the Literal class."""
@@ -374,7 +375,8 @@ class Schema(IcebergBaseModel):
for field in self._lazy_id_to_field.values():
if format_version < field.field_type.minimum_format_version():
raise ValueError(
- f"{field.field_type} is only supported in
{field.field_type.minimum_format_version()} or higher. Current format version
is: {format_version}"
+ f"{field.field_type} is only supported in
{field.field_type.minimum_format_version()} or higher. "
+ f"Current format version is: {format_version}"
)
@@ -1530,7 +1532,8 @@ class _PruneColumnsVisitor(SchemaVisitor[IcebergType |
None]):
else:
if not field.field_type.is_primitive:
raise ValueError(
- f"Cannot explicitly project List or Map types,
{field.field_id}:{field.name} of type {field.field_type} was selected"
+ f"Cannot explicitly project List or Map types, "
+ f"{field.field_id}:{field.name} of type
{field.field_type} was selected"
)
# Selected non-struct field
return field.field_type
@@ -1550,7 +1553,8 @@ class _PruneColumnsVisitor(SchemaVisitor[IcebergType |
None]):
else:
if not list_type.element_type.is_primitive:
raise ValueError(
- f"Cannot explicitly project List or Map types,
{list_type.element_id} of type {list_type.element_type} was selected"
+ f"Cannot explicitly project List or Map types, "
+ f"{list_type.element_id} of type
{list_type.element_type} was selected"
)
return list_type
elif element_result is not None:
@@ -1567,7 +1571,8 @@ class _PruneColumnsVisitor(SchemaVisitor[IcebergType |
None]):
return self._project_map(map_type, projected_struct)
if not map_type.value_type.is_primitive:
raise ValueError(
- f"Cannot explicitly project List or Map types, Map value
{map_type.value_id} of type {map_type.value_type} was selected"
+ f"Cannot explicitly project List or Map types, "
+ f"Map value {map_type.value_id} of type
{map_type.value_type} was selected"
)
return map_type
elif value_result is not None:
@@ -1764,9 +1769,17 @@ class
_SchemaCompatibilityVisitor(PreOrderSchemaVisitor[bool]):
# UnknownType can only be promoted to Primitive types
if isinstance(rhs.field_type, UnknownType):
if not isinstance(lhs.field_type, PrimitiveType):
- error_msg = f"Null type (UnknownType) cannot be
promoted to non-primitive type {lhs.field_type}. UnknownType can only be
promoted to primitive types (string, int, boolean, etc.) in V3+ tables."
+ error_msg = (
+ f"Null type (UnknownType) cannot be promoted to
non-primitive type {lhs.field_type}. "
+ "UnknownType can only be promoted to primitive
types (string, int, boolean, etc.) "
+ "in V3+ tables."
+ )
else:
- error_msg = f"Null type (UnknownType) cannot be
promoted to {lhs.field_type}. This may be due to table format version
limitations (V1/V2 tables don't support UnknownType promotion)."
+ error_msg = (
+ f"Null type (UnknownType) cannot be promoted to
{lhs.field_type}. "
+ "This may be due to table format version
limitations "
+ "(V1/V2 tables don't support UnknownType
promotion)."
+ )
self.rich_table.add_row("❌", str(lhs), f"{str(rhs)} -
{error_msg}")
else:
self.rich_table.add_row("❌", str(lhs), str(rhs))
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 6340ac57..cc0d9ff3 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -559,7 +559,8 @@ class Transaction:
for field in self.table_metadata.spec().fields:
if not isinstance(field.transform, IdentityTransform):
raise ValueError(
- f"For now dynamic overwrite does not support a table with
non-identity-transform field in the latest partition spec: {field}"
+ f"For now dynamic overwrite does not support a table with
non-identity-transform field "
+ f"in the latest partition spec: {field}"
)
downcast_ns_timestamp_to_us =
Config().get_bool(DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE) or False
@@ -768,8 +769,10 @@ class Transaction:
df: The input dataframe to upsert with the table's data.
join_cols: Columns to join on, if not provided, it will use the
identifier-field-ids.
- when_matched_update_all: Bool indicating to update rows that are
matched but require an update due to a value in a non-key column changing
- when_not_matched_insert_all: Bool indicating new rows to be
inserted that do not match any existing rows in the table
+ when_matched_update_all: Bool indicating to update rows that are
matched but require an update
+ due to a value in a non-key column changing
+ when_not_matched_insert_all: Bool indicating new rows to be
inserted that do not match any
+ existing rows in the table
case_sensitive: Bool indicating if the match should be
case-sensitive
branch: Branch Reference to run the upsert operation
snapshot_properties: Custom properties to be added to the snapshot
summary
@@ -859,8 +862,9 @@ class Transaction:
rows = pa.Table.from_batches([batch])
if when_matched_update_all:
- # function get_rows_to_update is doing a check on non-key
columns to see if any of the values have actually changed
- # we don't want to do just a blanket overwrite for matched
rows if the actual non-key column data hasn't changed
+ # function get_rows_to_update is doing a check on non-key
columns to see if any of the
+ # values have actually changed. We don't want to do just a
blanket overwrite for matched
+ # rows if the actual non-key column data hasn't changed.
# this extra step avoids unnecessary IO and writes
rows_to_update = upsert_util.get_rows_to_update(df, rows,
join_cols)
@@ -1367,8 +1371,10 @@ class Table:
df: The input dataframe to upsert with the table's data.
join_cols: Columns to join on, if not provided, it will use the
identifier-field-ids.
- when_matched_update_all: Bool indicating to update rows that are
matched but require an update due to a value in a non-key column changing
- when_not_matched_insert_all: Bool indicating new rows to be
inserted that do not match any existing rows in the table
+ when_matched_update_all: Bool indicating to update rows that are
matched but require an update
+ due to a value in a non-key column changing
+ when_not_matched_insert_all: Bool indicating new rows to be
inserted that do not match any
+ existing rows in the table
case_sensitive: Bool indicating if the match should be
case-sensitive
branch: Branch Reference to run the upsert operation
snapshot_properties: Custom properties to be added to the snapshot
summary
diff --git a/pyiceberg/table/locations.py b/pyiceberg/table/locations.py
index 3aeaaf22..771c6b5a 100644
--- a/pyiceberg/table/locations.py
+++ b/pyiceberg/table/locations.py
@@ -172,7 +172,8 @@ def _import_location_provider(
from pyiceberg.table import TableProperties
raise ValueError(
- f"{TableProperties.WRITE_PY_LOCATION_PROVIDER_IMPL} should be
full path (module.CustomLocationProvider), got: {location_provider_impl}"
+ f"{TableProperties.WRITE_PY_LOCATION_PROVIDER_IMPL} should be
full path "
+ f"(module.CustomLocationProvider), got:
{location_provider_impl}"
)
module_name, class_name = ".".join(path_parts[:-1]), path_parts[-1]
module = importlib.import_module(module_name)
diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py
index 8a55f77b..07960ad7 100644
--- a/pyiceberg/table/metadata.py
+++ b/pyiceberg/table/metadata.py
@@ -679,7 +679,8 @@ class TableMetadataUtil:
def _construct_without_validation(table_metadata: TableMetadata) ->
TableMetadata:
"""Construct table metadata from an existing table without performing
validation.
- This method is useful during a sequence of table updates when the
model needs to be re-constructed but is not yet ready for validation.
+ This method is useful during a sequence of table updates when the
model needs to be
+ re-constructed but is not yet ready for validation.
"""
if table_metadata.format_version is None:
raise ValidationError(f"Missing format-version in TableMetadata:
{table_metadata}")
diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
index 797093f9..a64cf7bd 100644
--- a/pyiceberg/table/snapshots.py
+++ b/pyiceberg/table/snapshots.py
@@ -70,9 +70,11 @@ class Operation(Enum):
Possible operation values are:
- append: Only data files were added and no files were removed.
- - replace: Data and delete files were added and removed without
changing table data; i.e., compaction, changing the data file format, or
relocating data files.
+ - replace: Data and delete files were added and removed without
changing table data;
+ i.e., compaction, changing the data file format, or relocating
data files.
- overwrite: Data and delete files were added and removed in a logical
overwrite operation.
- - delete: Data files were removed and their contents logically deleted
and/or delete files were added to delete rows.
+ - delete: Data files were removed and their contents logically deleted
and/or delete files
+ were added to delete rows.
"""
APPEND = "append"
diff --git a/pyiceberg/table/sorting.py b/pyiceberg/table/sorting.py
index 5243d7b1..62fc5554 100644
--- a/pyiceberg/table/sorting.py
+++ b/pyiceberg/table/sorting.py
@@ -67,7 +67,8 @@ class SortField(IcebergBaseModel):
transform (str): Transform that is used to produce values to be sorted
on from the source column.
This is the same transform as described in partition
transforms.
direction (SortDirection): Sort direction, that can only be either asc
or desc.
- null_order (NullOrder): Null order that describes the order of null
values when sorted. Can only be either nulls-first or nulls-last.
+ null_order (NullOrder): Null order that describes the order of null
values when sorted.
+ Can only be either nulls-first or nulls-last.
"""
def __init__(
diff --git a/pyiceberg/table/update/__init__.py
b/pyiceberg/table/update/__init__.py
index 68e3d9f7..07032bd8 100644
--- a/pyiceberg/table/update/__init__.py
+++ b/pyiceberg/table/update/__init__.py
@@ -176,7 +176,8 @@ class SetStatisticsUpdate(IcebergBaseModel):
snapshot_id: int | None = Field(
None,
alias="snapshot-id",
- description="snapshot-id is **DEPRECATED for REMOVAL** since it
contains redundant information. Use `statistics.snapshot-id` field instead.",
+ description="snapshot-id is **DEPRECATED for REMOVAL** since it
contains redundant information. "
+ "Use `statistics.snapshot-id` field instead.",
)
@model_validator(mode="before")
@@ -450,7 +451,8 @@ def _(update: AddSnapshotUpdate, base_metadata:
TableMetadata, context: _TableMe
and update.snapshot.first_row_id < base_metadata.next_row_id
):
raise ValueError(
- f"Cannot add a snapshot with first row id smaller than the table's
next-row-id {update.snapshot.first_row_id} < {base_metadata.next_row_id}"
+ f"Cannot add a snapshot with first row id smaller than the table's
next-row-id "
+ f"{update.snapshot.first_row_id} < {base_metadata.next_row_id}"
)
context.add_update(update)
@@ -812,7 +814,8 @@ class AssertRefSnapshotId(ValidatableTableRequirement):
raise CommitFailedException(f"Requirement failed: {ref_type}
{self.ref} was created concurrently")
elif self.snapshot_id != snapshot_ref.snapshot_id:
raise CommitFailedException(
- f"Requirement failed: {ref_type} {self.ref} has changed:
expected id {self.snapshot_id}, found {snapshot_ref.snapshot_id}"
+ f"Requirement failed: {ref_type} {self.ref} has changed: "
+ f"expected id {self.snapshot_id}, found
{snapshot_ref.snapshot_id}"
)
elif self.snapshot_id is not None:
raise CommitFailedException(f"Requirement failed: branch or tag
{self.ref} is missing, expected {self.snapshot_id}")
@@ -829,7 +832,8 @@ class
AssertLastAssignedFieldId(ValidatableTableRequirement):
raise CommitFailedException("Requirement failed: current table
metadata is missing")
elif base_metadata.last_column_id != self.last_assigned_field_id:
raise CommitFailedException(
- f"Requirement failed: last assigned field id has changed:
expected {self.last_assigned_field_id}, found {base_metadata.last_column_id}"
+ f"Requirement failed: last assigned field id has changed: "
+ f"expected {self.last_assigned_field_id}, found
{base_metadata.last_column_id}"
)
@@ -844,7 +848,8 @@ class AssertCurrentSchemaId(ValidatableTableRequirement):
raise CommitFailedException("Requirement failed: current table
metadata is missing")
elif self.current_schema_id != base_metadata.current_schema_id:
raise CommitFailedException(
- f"Requirement failed: current schema id has changed: expected
{self.current_schema_id}, found {base_metadata.current_schema_id}"
+ f"Requirement failed: current schema id has changed: "
+ f"expected {self.current_schema_id}, found
{base_metadata.current_schema_id}"
)
@@ -859,7 +864,8 @@ class
AssertLastAssignedPartitionId(ValidatableTableRequirement):
raise CommitFailedException("Requirement failed: current table
metadata is missing")
elif base_metadata.last_partition_id !=
self.last_assigned_partition_id:
raise CommitFailedException(
- f"Requirement failed: last assigned partition id has changed:
expected {self.last_assigned_partition_id}, found
{base_metadata.last_partition_id}"
+ f"Requirement failed: last assigned partition id has changed: "
+ f"expected {self.last_assigned_partition_id}, found
{base_metadata.last_partition_id}"
)
@@ -874,7 +880,8 @@ class AssertDefaultSpecId(ValidatableTableRequirement):
raise CommitFailedException("Requirement failed: current table
metadata is missing")
elif self.default_spec_id != base_metadata.default_spec_id:
raise CommitFailedException(
- f"Requirement failed: default spec id has changed: expected
{self.default_spec_id}, found {base_metadata.default_spec_id}"
+ f"Requirement failed: default spec id has changed: "
+ f"expected {self.default_spec_id}, found
{base_metadata.default_spec_id}"
)
@@ -889,7 +896,8 @@ class AssertDefaultSortOrderId(ValidatableTableRequirement):
raise CommitFailedException("Requirement failed: current table
metadata is missing")
elif self.default_sort_order_id != base_metadata.default_sort_order_id:
raise CommitFailedException(
- f"Requirement failed: default sort order id has changed:
expected {self.default_sort_order_id}, found
{base_metadata.default_sort_order_id}"
+ f"Requirement failed: default sort order id has changed: "
+ f"expected {self.default_sort_order_id}, found
{base_metadata.default_sort_order_id}"
)
diff --git a/pyiceberg/table/update/snapshot.py
b/pyiceberg/table/update/snapshot.py
index 389299bf..c88337e7 100644
--- a/pyiceberg/table/update/snapshot.py
+++ b/pyiceberg/table/update/snapshot.py
@@ -433,7 +433,8 @@ class _DeleteFiles(_SnapshotProducer["_DeleteFiles"]):
def _copy_with_new_status(entry: ManifestEntry, status:
ManifestEntryStatus) -> ManifestEntry:
return ManifestEntry.from_args(
status=status,
- # When a file is replaced or deleted from the dataset, its
manifest entry fields store the snapshot ID in which the file was deleted and
status 2 (deleted).
+ # When a file is replaced or deleted from the dataset, its
manifest entry fields store the
+ # snapshot ID in which the file was deleted and status 2
(deleted).
snapshot_id=self.snapshot_id if status ==
ManifestEntryStatus.DELETED else entry.snapshot_id,
sequence_number=entry.sequence_number,
file_sequence_number=entry.file_sequence_number,
diff --git a/pyiceberg/utils/schema_conversion.py
b/pyiceberg/utils/schema_conversion.py
index 66e57d5d..f08f5395 100644
--- a/pyiceberg/utils/schema_conversion.py
+++ b/pyiceberg/utils/schema_conversion.py
@@ -107,10 +107,12 @@ class AvroSchemaConversion:
... })
>>> iceberg_schema = Schema(
... NestedField(
- ... field_id=500, name="manifest_path",
field_type=StringType(), required=False, doc="Location URI with FS scheme"
+ ... field_id=500, name="manifest_path",
field_type=StringType(),
+ ... required=False, doc="Location URI with FS scheme"
... ),
... NestedField(
- ... field_id=501, name="manifest_length",
field_type=LongType(), required=False, doc="Total file size in bytes"
+ ... field_id=501, name="manifest_length",
field_type=LongType(),
+ ... required=False, doc="Total file size in bytes"
... ),
... schema_id=1
... )
diff --git a/ruff.toml b/ruff.toml
index 1ce05228..257556a4 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -57,9 +57,7 @@ select = [
"I", # isort
"UP", # pyupgrade
]
-ignore = [
- "E501"
-]
+ignore = []
# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py
index 1a3978a0..88b653e4 100644
--- a/tests/catalog/test_hive.py
+++ b/tests/catalog/test_hive.py
@@ -970,7 +970,10 @@ def test_rename_table_to_namespace_does_not_exists() ->
None:
catalog._client = MagicMock()
catalog._client.__enter__().alter_table_with_environment_context.side_effect =
InvalidOperationException(
- message="Unable to change partition or table. Database default does
not exist Check metastore logs for detailed stack.does_not_exists"
+ message=(
+ "Unable to change partition or table. Database default does not
exist "
+ "Check metastore logs for detailed stack.does_not_exists"
+ )
)
with pytest.raises(NoSuchNamespaceError) as exc_info:
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 71341c8b..9fb1fa9a 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -156,7 +156,8 @@ def test_no_uri_supplied() -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_200(rest_mock: Mocker) -> None:
rest_mock.post(
@@ -179,7 +180,8 @@ def test_token_200(rest_mock: Mocker) -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_200_without_optional_fields(rest_mock: Mocker) -> None:
rest_mock.post(
@@ -198,7 +200,8 @@ def test_token_200_without_optional_fields(rest_mock:
Mocker) -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_with_optional_oauth_params(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
@@ -223,7 +226,8 @@ def test_token_with_optional_oauth_params(rest_mock:
Mocker) -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_with_optional_oauth_params_as_empty(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
@@ -246,7 +250,8 @@ def
test_token_with_optional_oauth_params_as_empty(rest_mock: Mocker) -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_with_default_scope(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
@@ -267,7 +272,8 @@ def test_token_with_default_scope(rest_mock: Mocker) ->
None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_with_custom_scope(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
@@ -289,7 +295,8 @@ def test_token_with_custom_scope(rest_mock: Mocker) -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_200_w_oauth2_server_uri(rest_mock: Mocker) -> None:
rest_mock.post(
@@ -314,7 +321,8 @@ def test_token_200_w_oauth2_server_uri(rest_mock: Mocker)
-> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_config_200(requests_mock: Mocker) -> None:
requests_mock.get(
@@ -402,7 +410,8 @@ def test_config_sets_headers(requests_mock: Mocker) -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_400(rest_mock: Mocker) -> None:
rest_mock.post(
@@ -418,7 +427,8 @@ def test_token_400(rest_mock: Mocker) -> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_token_401(rest_mock: Mocker) -> None:
message = "invalid_client"
@@ -664,7 +674,8 @@ def test_list_namespace_with_parent_404(rest_mock: Mocker)
-> None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
@pytest.mark.parametrize("status_code", [401, 419])
def test_list_namespaces_token_expired_success_on_retries(rest_mock: Mocker,
status_code: int) -> None:
@@ -2017,7 +2028,8 @@ def test_custom_namespace_separator(rest_mock: Mocker) ->
None:
@pytest.mark.filterwarnings(
- "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
+ "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. "
+ "Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
)
def test_auth_header(rest_mock: Mocker) -> None:
mock_request = rest_mock.post(
diff --git a/tests/cli/test_console.py b/tests/cli/test_console.py
index a713975e..f4b343f6 100644
--- a/tests/cli/test_console.py
+++ b/tests/cli/test_console.py
@@ -627,9 +627,10 @@ def test_json_schema(catalog: InMemoryCatalog) -> None:
runner = CliRunner()
result = runner.invoke(run, ["--output=json", "schema",
"default.my_table"])
assert result.exit_code == 0
- assert (
- result.output
- ==
"""{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}\n"""
+ assert result.output == (
+
'{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},'
+ '{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},'
+
'{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}\n'
)
@@ -819,9 +820,8 @@ def
test_json_properties_get_table_specific_property_that_doesnt_exist(catalog:
runner = CliRunner()
result = runner.invoke(run, ["--output=json", "properties", "get",
"table", "default.my_table", "doesnotexist"])
assert result.exit_code == 1
- assert (
- result.output
- == """{"type": "NoSuchPropertyException", "message": "Could not find
property doesnotexist on table default.my_table"}\n"""
+ assert result.output == (
+ '{"type": "NoSuchPropertyException", "message": "Could not find
property doesnotexist on table default.my_table"}\n'
)
diff --git a/tests/conftest.py b/tests/conftest.py
index 9d80f489..ca9175f2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1188,7 +1188,10 @@ manifest_entry_records = [
"status": 1,
"snapshot_id": 8744736658442914487,
"data_file": {
- "file_path":
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet",
+ "file_path": (
+
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/"
+ "00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
+ ),
"file_format": "PARQUET",
"partition": {"VendorID": 1, "tpep_pickup_day": 1925},
"record_count": 19513,
@@ -1308,7 +1311,10 @@ manifest_entry_records = [
"status": 1,
"snapshot_id": 8744736658442914487,
"data_file": {
- "file_path":
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=1/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00002.parquet",
+ "file_path": (
+
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=1/"
+ "00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00002.parquet"
+ ),
"file_format": "PARQUET",
"partition": {"VendorID": 1, "tpep_pickup_datetime": None},
"record_count": 95050,
@@ -2146,7 +2152,10 @@ def adls_fsspec_fileio(request: pytest.FixtureRequest)
-> Generator[FsspecFileIO
azurite_url = request.config.getoption("--adls.endpoint")
azurite_account_name = request.config.getoption("--adls.account-name")
azurite_account_key = request.config.getoption("--adls.account-key")
- azurite_connection_string =
f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};"
+ azurite_connection_string = (
+ f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};"
+
f"AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};"
+ )
properties = {
"adls.connection-string": azurite_connection_string,
"adls.account-name": azurite_account_name,
@@ -2183,7 +2192,10 @@ def pyarrow_fileio_adls(request: pytest.FixtureRequest)
-> Generator[Any, None,
azurite_account_name = request.config.getoption("--adls.account-name")
azurite_account_key = request.config.getoption("--adls.account-key")
- azurite_connection_string =
f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};"
+ azurite_connection_string = (
+ f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};"
+
f"AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};"
+ )
properties = {
ADLS_ACCOUNT_NAME: azurite_account_name,
ADLS_ACCOUNT_KEY: azurite_account_key,
@@ -2623,7 +2635,8 @@ TEST_DATA_WITH_NULL = {
# Not supported by Spark
# 'time': [time(1, 22, 0), None, time(19, 25, 0)],
# Not natively supported by Arrow
- # 'uuid': [uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, None,
uuid.UUID('11111111-1111-1111-1111-111111111111').bytes],
+ # 'uuid': [uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, None,
+ # uuid.UUID('11111111-1111-1111-1111-111111111111').bytes],
"binary": [b"\01", None, b"\22"],
"fixed": [
uuid.UUID("00000000-0000-0000-0000-000000000000").bytes,
@@ -2676,7 +2689,8 @@ def arrow_table_with_null(pa_schema: "pa.Schema") ->
"pa.Table":
"long": [1, None, 9],
"float": [0.0, None, 0.9],
"double": [0.0, None, 0.9],
- # 'time': [1_000_000, None, 3_000_000], # Example times: 1s,
none, and 3s past midnight #Spark does not support time fields
+ # 'time': [1_000_000, None, 3_000_000], # Example times: 1s,
none, and 3s past midnight
+ # Spark does not support time fields
"timestamp": [datetime(2023, 1, 1, 19, 25, 00), None,
datetime(2023, 3, 1, 19, 25, 00)],
"timestamptz": [
datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc),
@@ -2687,7 +2701,8 @@ def arrow_table_with_null(pa_schema: "pa.Schema") ->
"pa.Table":
# Not supported by Spark
# 'time': [time(1, 22, 0), None, time(19, 25, 0)],
# Not natively supported by Arrow
- # 'uuid':
[uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, None,
uuid.UUID('11111111-1111-1111-1111-111111111111').bytes],
+ # 'uuid':
[uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, None,
+ #
uuid.UUID('11111111-1111-1111-1111-111111111111').bytes],
"binary": [b"\01", None, b"\22"],
"fixed": [
uuid.UUID("00000000-0000-0000-0000-000000000000").bytes,
diff --git a/tests/expressions/test_evaluator.py
b/tests/expressions/test_evaluator.py
index 5be3e92b..03a33e4c 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -1427,7 +1427,9 @@ def test_strict_integer_in(strict_data_file_schema:
Schema, strict_data_file_1:
def test_strict_integer_not_in(strict_data_file_schema: Schema,
strict_data_file_1: DataFile) -> None:
- # should_read = _StrictMetricsEvaluator(strict_data_file_schema,
NotIn("id", {INT_MIN_VALUE - 25, INT_MIN_VALUE - 24})).eval(strict_data_file_1)
+ # should_read = _StrictMetricsEvaluator(
+ # strict_data_file_schema, NotIn("id", {INT_MIN_VALUE - 25,
INT_MIN_VALUE - 24})
+ # ).eval(strict_data_file_1)
# assert should_read, "Should match: all values != 5 and != 6"
should_read = _StrictMetricsEvaluator(strict_data_file_schema, NotIn("id",
{INT_MIN_VALUE - 1, INT_MIN_VALUE})).eval(
diff --git a/tests/expressions/test_visitors.py
b/tests/expressions/test_visitors.py
index 798e9f64..7687d2e5 100644
--- a/tests/expressions/test_visitors.py
+++ b/tests/expressions/test_visitors.py
@@ -92,10 +92,10 @@ from pyiceberg.types import (
class ExampleVisitor(BooleanExpressionVisitor[list[str]]):
- """A test implementation of a BooleanExpressionVisitor
+ """A test implementation of a BooleanExpressionVisitor.
- As this visitor visits each node, it appends an element to a
`visit_history` list. This enables testing that a given expression is
- visited in an expected order by the `visit` method.
+ As this visitor visits each node, it appends an element to a
`visit_history` list.
+ This enables testing that a given expression is visited in an expected
order by the `visit` method.
"""
def __init__(self) -> None:
@@ -131,9 +131,10 @@ class ExampleVisitor(BooleanExpressionVisitor[list[str]]):
class
FooBoundBooleanExpressionVisitor(BoundBooleanExpressionVisitor[list[str]]):
- """A test implementation of a BoundBooleanExpressionVisitor
- As this visitor visits each node, it appends an element to a
`visit_history` list. This enables testing that a given bound expression is
- visited in an expected order by the `visit` method.
+ """A test implementation of a BoundBooleanExpressionVisitor.
+
+ As this visitor visits each node, it appends an element to a
`visit_history` list.
+ This enables testing that a given bound expression is visited in an
expected order by the `visit` method.
"""
def __init__(self) -> None:
@@ -260,9 +261,10 @@ def test_bind_visitor_already_bound(table_schema_simple:
Schema) -> None:
with pytest.raises(TypeError) as exc_info:
visit(bound, visitor=BindVisitor(schema=table_schema_simple,
case_sensitive=True))
assert (
- "Found already bound predicate:
BoundEqualTo(term=BoundReference(field=NestedField(field_id=1, name='foo',
field_type=StringType(), required=False),
accessor=Accessor(position=0,inner=None)), literal=literal('hello'))"
- == str(exc_info.value)
- )
+ "Found already bound predicate: BoundEqualTo(term=BoundReference("
+ "field=NestedField(field_id=1, name='foo', field_type=StringType(),
required=False), "
+ "accessor=Accessor(position=0,inner=None)), literal=literal('hello'))"
+ ) == str(exc_info.value)
def test_visit_bound_visitor_unknown_predicate() -> None:
diff --git a/tests/integration/test_add_files.py
b/tests/integration/test_add_files.py
index 86ef05e5..5c529e6e 100644
--- a/tests/integration/test_add_files.py
+++ b/tests/integration/test_add_files.py
@@ -470,8 +470,8 @@ def test_add_files_to_bucket_partitioned_table_fails(spark:
SparkSession, sessio
with pytest.raises(ValueError) as exc_info:
tbl.add_files(file_paths=file_paths)
assert (
- "Cannot infer partition value from parquet metadata for a non-linear
Partition Field: baz_bucket_3 with transform bucket[3]"
- in str(exc_info.value)
+ "Cannot infer partition value from parquet metadata for a non-linear
Partition Field: "
+ "baz_bucket_3 with transform bucket[3]" in str(exc_info.value)
)
@@ -518,8 +518,8 @@ def
test_add_files_to_partitioned_table_fails_with_lower_and_upper_mismatch(
with pytest.raises(ValueError) as exc_info:
tbl.add_files(file_paths=file_paths)
assert (
- "Cannot infer partition value from parquet metadata as there are more
than one partition values for Partition Field: baz. lower_value=123,
upper_value=124"
- in str(exc_info.value)
+ "Cannot infer partition value from parquet metadata as there are more
than one partition values "
+ "for Partition Field: baz. lower_value=123, upper_value=124" in
str(exc_info.value)
)
@@ -754,8 +754,8 @@ def
test_add_files_with_timestamp_tz_ns_fails(session_catalog: Catalog, format_v
exception_cause = exc_info.value.__cause__
assert isinstance(exception_cause, TypeError)
assert (
- "Iceberg does not yet support 'ns' timestamp precision. Use
'downcast-ns-timestamp-to-us-on-write' configuration property to automatically
downcast 'ns' to 'us' on write."
- in exception_cause.args[0]
+ "Iceberg does not yet support 'ns' timestamp precision. Use
'downcast-ns-timestamp-to-us-on-write' "
+ "configuration property to automatically downcast 'ns' to 'us' on
write." in exception_cause.args[0]
)
diff --git a/tests/integration/test_inspect_table.py
b/tests/integration/test_inspect_table.py
index ea0cca9b..b40ba7e1 100644
--- a/tests/integration/test_inspect_table.py
+++ b/tests/integration/test_inspect_table.py
@@ -1096,10 +1096,21 @@ def test_inspect_files_format_version_3(spark:
SparkSession, session_catalog: Ca
},
)
+ row1 = (
+ "(false, 'a', 'aaaaaaaaaaaaaaaaaaaaaa', 1, 1, 0.0, 0.0, "
+ "TIMESTAMP('2023-01-01 19:25:00'), TIMESTAMP('2023-01-01
19:25:00+00:00'), "
+ "DATE('2023-01-01'), X'01', X'00000000000000000000000000000000')"
+ )
+ row2 = "(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL)"
+ row3 = (
+ "(true, 'z', 'zzzzzzzzzzzzzzzzzzzzzz', 9, 9, 0.9, 0.9, "
+ "TIMESTAMP('2023-03-01 19:25:00'), TIMESTAMP('2023-03-01
19:25:00+00:00'), "
+ "DATE('2023-03-01'), X'12', X'11111111111111111111111111111111')"
+ )
insert_data_sql = f"""INSERT INTO {identifier} VALUES
- (false, 'a', 'aaaaaaaaaaaaaaaaaaaaaa', 1, 1, 0.0, 0.0,
TIMESTAMP('2023-01-01 19:25:00'), TIMESTAMP('2023-01-01 19:25:00+00:00'),
DATE('2023-01-01'), X'01', X'00000000000000000000000000000000'),
- (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL),
- (true, 'z', 'zzzzzzzzzzzzzzzzzzzzzz', 9, 9, 0.9, 0.9,
TIMESTAMP('2023-03-01 19:25:00'), TIMESTAMP('2023-03-01 19:25:00+00:00'),
DATE('2023-03-01'), X'12', X'11111111111111111111111111111111');
+ {row1},
+ {row2},
+ {row3};
"""
spark.sql(insert_data_sql)
diff --git a/tests/integration/test_partitioning_key.py
b/tests/integration/test_partitioning_key.py
index 0419fcf2..f91efc5a 100644
--- a/tests/integration/test_partitioning_key.py
+++ b/tests/integration/test_partitioning_key.py
@@ -77,7 +77,10 @@ identifier = "default.test_table"
@pytest.mark.parametrize(
- "partition_fields, partition_values, expected_partition_record,
expected_hive_partition_path_slice, spark_create_table_sql_for_justification,
spark_data_insert_sql_for_justification",
+ (
+ "partition_fields, partition_values, expected_partition_record,
expected_hive_partition_path_slice, "
+ "spark_create_table_sql_for_justification,
spark_data_insert_sql_for_justification"
+ ),
[
# Identity Transform
(
@@ -161,7 +164,8 @@ identifier = "default.test_table"
[3.14],
Record(3.14),
"float_field=3.14",
- # spark writes differently as pyiceberg,
Record[float_field=3.140000104904175], path:float_field=3.14 (Record has
difference)
+ # spark writes differently as pyiceberg,
Record[float_field=3.140000104904175],
+ # path:float_field=3.14 (Record has difference)
# so justification (compare expected value with spark behavior)
would fail.
None,
None,
@@ -184,7 +188,8 @@ identifier = "default.test_table"
[6.282],
Record(6.282),
"double_field=6.282",
- # spark writes differently as pyiceberg,
Record[double_field=6.2820000648498535] path:double_field=6.282 (Record has
difference)
+ # spark writes differently as pyiceberg,
Record[double_field=6.2820000648498535]
+ # path:double_field=6.282 (Record has difference)
# so justification (compare expected value with spark behavior)
would fail.
None,
None,
@@ -246,8 +251,10 @@ identifier = "default.test_table"
Record(1672574400000000),
"timestamp_field=2023-01-01T12%3A00%3A00",
# Spark writes differently as pyiceberg, so justification (compare
expected value with spark behavior) would fail
- # AssertionError: assert 'timestamp_field=2023-01-01T12%3A00%3A00'
in
's3://warehouse/default/test_table/data/timestamp_field=2023-01-01T12%3A00/00000-5-f9dca69a-9fb7-4830-9ef6-62d3d7afc09e-00001.parquet'
- # TLDR: CAST('2023-01-01 12:00:00' AS TIMESTAMP_NTZ) becomes
2023-01-01T12:00 in the hive partition path when spark writes it (without the
seconds).
+ # AssertionError: assert 'timestamp_field=2023-01-01T12%3A00%3A00'
in
+ #
's3://warehouse/default/test_table/data/timestamp_field=2023-01-01T12%3A00/...-00001.parquet'
+ # TLDR: CAST('2023-01-01 12:00:00' AS TIMESTAMP_NTZ) becomes
2023-01-01T12:00 in the hive partition path
+ # when spark writes it (without the seconds).
None,
None,
# f"""CREATE TABLE {identifier} (
@@ -270,8 +277,10 @@ identifier = "default.test_table"
Record(1672563601000999),
"timestamptz_field=2023-01-01T09%3A00%3A01.000999%2B00%3A00",
# Spark writes differently as pyiceberg, so justification (compare
expected value with spark behavior) would fail
- # AssertionError: assert
'timestamptz_field=2023-01-01T09%3A00%3A01.000999%2B00%3A00' in
's3://warehouse/default/test_table/data/timestamptz_field=2023-01-01T09%3A00%3A01.000999Z/00000-5-b710fc4d-66b6-47f1-b8ae-6208f8aaa2d4-00001.parquet'
- # TLDR: CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP)
becomes 2023-01-01T09:00:01.000999Z in the hive partition path when spark
writes it (while iceberg: timestamptz_field=2023-01-01T09:00:01.000999+00:00).
+ # AssertionError: assert
'timestamptz_field=2023-01-01T09%3A00%3A01.000999%2B00%3A00' in
+ #
's3://warehouse/default/test_table/data/timestamptz_field=2023-01-01T09%3A00%3A01.000999Z/...-00001.parquet'
+ # TLDR: CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP)
becomes 2023-01-01T09:00:01.000999Z in the hive
+ # partition path when spark writes it (while iceberg:
timestamptz_field=2023-01-01T09:00:01.000999+00:00).
None,
None,
# f"""CREATE TABLE {identifier} (
@@ -285,7 +294,8 @@ identifier = "default.test_table"
# """,
# f"""INSERT INTO {identifier}
# VALUES
- # (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP),
'Associated string value for timestamp 2023-01-01 12:00:01.000999+03:00')
+ # (CAST('2023-01-01 12:00:01.000999+03:00' AS TIMESTAMP),
+ # 'Associated string value for timestamp 2023-01-01
12:00:01.000999+03:00')
# """
),
(
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index 8811330a..c123ddb7 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -1307,7 +1307,8 @@ def test_scan_source_field_missing_in_spec(catalog:
Catalog, spark: SparkSession
spark.sql(f"DROP TABLE IF EXISTS {identifier}")
spark.sql(f"CREATE TABLE {identifier} (foo int, bar int, jaz string) USING
ICEBERG PARTITIONED BY (foo, bar)")
spark.sql(
- f"INSERT INTO {identifier} (foo, bar, jaz) VALUES (1, 1, 'dummy
data'), (1, 2, 'dummy data again'), (2, 1, 'another partition')"
+ f"INSERT INTO {identifier} (foo, bar, jaz) VALUES "
+ f"(1, 1, 'dummy data'), (1, 2, 'dummy data again'), (2, 1, 'another
partition')"
)
spark.sql(f"ALTER TABLE {identifier} DROP PARTITION FIELD foo")
spark.sql(f"ALTER TABLE {identifier} DROP COLUMN foo")
diff --git a/tests/integration/test_writes/test_partitioned_writes.py
b/tests/integration/test_writes/test_partitioned_writes.py
index d194669b..2bc49856 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -372,7 +372,9 @@ def test_dynamic_partition_overwrite_non_identity_transform(
)
with pytest.raises(
ValueError,
- match="For now dynamic overwrite does not support a table with
non-identity-transform field in the latest partition spec: *",
+ match=(
+ "For now dynamic overwrite does not support a table with
non-identity-transform field in the latest partition spec: *"
+ ),
):
tbl.dynamic_partition_overwrite(arrow_table_with_null.slice(0, 1))
@@ -591,7 +593,8 @@ def test_data_files_with_table_partitioned_with_null(
# the first snapshot generates M3 with 6 delete data
entries collected from M1 and M2.
# ML3 = [M3]
#
- # The second snapshot generates M4 with 3 appended data
entries and since M3 (previous manifests) only has delete entries it does not
lint to it.
+ # The second snapshot generates M4 with 3 appended data
entries and since M3 (previous manifests)
+ # only has delete entries it does not lint to it.
# ML4 = [M4]
# Append : Append generates M5 with new data entries and links
to all previous manifests which is M4 .
@@ -603,8 +606,8 @@ def test_data_files_with_table_partitioned_with_null(
# then it generates M7 as remaining existing entries
from M1 and M8 from M2
# ML6 = [M6, M7, M8]
#
- # The second snapshot generates M9 with 3 appended data
entries and it also looks at manifests in ML6 (previous manifests)
- # it ignores M6 since it only has delete entries but it
links to M7 and M8.
+ # The second snapshot generates M9 with 3 appended data
entries and it also looks at manifests
+ # in ML6 (previous manifests) it ignores M6 since it
only has delete entries but it links to M7 and M8.
# ML7 = [M9, M7, M8]
# tldr:
diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py
index 9e4a1da5..94eff1cb 100644
--- a/tests/io/test_fsspec.py
+++ b/tests/io/test_fsspec.py
@@ -812,7 +812,9 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None:
"uri": new_uri,
"headers": {
"Authorization": [
- "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request,
SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token,
Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
+ "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, "
+
"SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, "
+
"Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
],
"Host": ["bucket.s3.us-west-2.amazonaws.com"],
"User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"],
@@ -849,11 +851,25 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None:
assert request.url == new_uri
assert dict(request.headers) == {
- "Authorization": "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request,
SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token,
Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02",
+ "Authorization": (
+ "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, "
+
"SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, "
+
"Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
+ ),
"Host": "bucket.s3.us-west-2.amazonaws.com",
"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0",
"X-Amz-Date": "20221017T102940Z",
- "X-Amz-Security-Token":
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0
[...]
+ "X-Amz-Security-Token": (
+
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd"
+
"/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8Z"
+
"jK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0"
+
"gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293v"
+
"pbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01e"
+
"QAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3"
+
"ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwRE"
+
"f1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/"
+ "5Gw="
+ ),
"x-amz-content-sha256": "UNSIGNED-PAYLOAD",
"X-Custom-Header": "value",
}
@@ -868,7 +884,9 @@ def test_s3v4_rest_signer_endpoint(requests_mock: Mocker)
-> None:
"uri": new_uri,
"headers": {
"Authorization": [
- "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request,
SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token,
Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
+ "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, "
+
"SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, "
+
"Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
],
"Host": ["bucket.s3.us-west-2.amazonaws.com"],
"User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"],
@@ -904,11 +922,25 @@ def test_s3v4_rest_signer_endpoint(requests_mock: Mocker)
-> None:
assert request.url == new_uri
assert dict(request.headers) == {
- "Authorization": "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request,
SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token,
Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02",
+ "Authorization": (
+ "AWS4-HMAC-SHA256
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, "
+
"SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, "
+
"Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
+ ),
"Host": "bucket.s3.us-west-2.amazonaws.com",
"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0",
"X-Amz-Date": "20221017T102940Z",
- "X-Amz-Security-Token":
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0
[...]
+ "X-Amz-Security-Token": (
+
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd"
+
"/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8Z"
+
"jK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0"
+
"gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293v"
+
"pbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01e"
+
"QAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3"
+
"ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwRE"
+
"f1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/"
+ "5Gw="
+ ),
"x-amz-content-sha256": "UNSIGNED-PAYLOAD",
}
@@ -946,8 +978,9 @@ def test_s3v4_rest_signer_forbidden(requests_mock: Mocker)
-> None:
signer(request)
assert (
- """Failed to sign request 401: {'method': 'HEAD', 'region':
'us-west-2', 'uri':
'https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro',
'headers': {'User-Agent': ['Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0']}}"""
- in str(exc_info.value)
+ "Failed to sign request 401: {'method': 'HEAD', 'region': 'us-west-2',
"
+ "'uri':
'https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro',
"
+ "'headers': {'User-Agent': ['Botocore/1.27.59 Python/3.10.7
Darwin/21.5.0']}}" in str(exc_info.value)
)
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index e815ea9d..89c11435 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -1195,7 +1195,8 @@ def test_projection_concat_files(schema_int: Schema,
file_int: str) -> None:
def test_identity_transform_column_projection(tmp_path: str, catalog:
InMemoryCatalog) -> None:
- # Test by adding a non-partitioned data file to a partitioned table,
verifying partition value projection from manifest metadata.
+ # Test by adding a non-partitioned data file to a partitioned table,
verifying partition value
+ # projection from manifest metadata.
# TODO: Update to use a data file created by writing data to an
unpartitioned table once add_files supports field IDs.
# (context:
https://github.com/apache/iceberg-python/pull/1443#discussion_r1901374875)
@@ -1264,7 +1265,8 @@ def test_identity_transform_column_projection(tmp_path:
str, catalog: InMemoryCa
def test_identity_transform_columns_projection(tmp_path: str, catalog:
InMemoryCatalog) -> None:
- # Test by adding a non-partitioned data file to a multi-partitioned table,
verifying partition value projection from manifest metadata.
+ # Test by adding a non-partitioned data file to a multi-partitioned table,
verifying partition value
+ # projection from manifest metadata.
# TODO: Update to use a data file created by writing data to an
unpartitioned table once add_files supports field IDs.
# (context:
https://github.com/apache/iceberg-python/pull/1443#discussion_r1901374875)
schema = Schema(
@@ -1542,16 +1544,19 @@ def
test_projection_maps_of_structs(schema_map_of_structs: Schema, file_map_of_s
strict=True,
):
assert actual.as_py() == expected
- assert (
- repr(result_table.schema)
- == """locations: map<string, struct<latitude: double not null,
longitude: double not null, altitude: double>>
- child 0, entries: struct<key: string not null, value: struct<latitude:
double not null, longitude: double not null, al (... 25 chars omitted) not null
- child 0, key: string not null
- child 1, value: struct<latitude: double not null, longitude: double not
null, altitude: double> not null
- child 0, latitude: double not null
- child 1, longitude: double not null
- child 2, altitude: double"""
+ expected_schema_repr = (
+ "locations: map<string, struct<latitude: double not null, "
+ "longitude: double not null, altitude: double>>\n"
+ " child 0, entries: struct<key: string not null, value:
struct<latitude: double not null, "
+ "longitude: double not null, al (... 25 chars omitted) not null\n"
+ " child 0, key: string not null\n"
+ " child 1, value: struct<latitude: double not null, longitude:
double not null, "
+ "altitude: double> not null\n"
+ " child 0, latitude: double not null\n"
+ " child 1, longitude: double not null\n"
+ " child 2, altitude: double"
)
+ assert repr(result_table.schema) == expected_schema_repr
def test_projection_nested_struct_different_parent_id(file_struct: str) ->
None:
@@ -2785,7 +2790,8 @@ def test__to_requested_schema_integer_promotion(
def test_pyarrow_file_io_fs_by_scheme_cache() -> None:
- # It's better to set up multi-region minio servers for an integration test
once `endpoint_url` argument becomes available for `resolve_s3_region`
+ # It's better to set up multi-region minio servers for an integration test
once `endpoint_url` argument
+ # becomes available for `resolve_s3_region`
# Refer to: https://github.com/apache/arrow/issues/43713
pyarrow_file_io = PyArrowFileIO()
@@ -2819,7 +2825,8 @@ def test_pyarrow_file_io_fs_by_scheme_cache() -> None:
def test_pyarrow_io_new_input_multi_region(caplog: Any) -> None:
- # It's better to set up multi-region minio servers for an integration test
once `endpoint_url` argument becomes available for `resolve_s3_region`
+ # It's better to set up multi-region minio servers for an integration test
once `endpoint_url` argument
+ # becomes available for `resolve_s3_region`
# Refer to: https://github.com/apache/arrow/issues/43713
user_provided_region = "ap-southeast-1"
bucket_regions = [
@@ -3030,7 +3037,8 @@ def test_iceberg_read_orc(tmp_path: Path) -> None:
partition_specs=[PartitionSpec()],
properties={
"write.format.default": "parquet", # This doesn't matter for
reading
- "schema.name-mapping.default": '[{"field-id": 1, "names": ["id"]},
{"field-id": 2, "names": ["name"]}]', # Add name mapping for ORC files without
field IDs
+ # Add name mapping for ORC files without field IDs
+ "schema.name-mapping.default": ('[{"field-id": 1, "names":
["id"]}, {"field-id": 2, "names": ["name"]}]'),
},
)
io = PyArrowFileIO()
@@ -3130,7 +3138,10 @@ def test_orc_row_filtering_predicate_pushdown(tmp_path:
Path) -> None:
schemas=[schema],
partition_specs=[PartitionSpec()],
properties={
- "schema.name-mapping.default": '[{"field-id": 1, "names": ["id"]},
{"field-id": 2, "names": ["name"]}, {"field-id": 3, "names": ["age"]},
{"field-id": 4, "names": ["active"]}]',
+ "schema.name-mapping.default": (
+ '[{"field-id": 1, "names": ["id"]}, {"field-id": 2, "names":
["name"]}, '
+ '{"field-id": 3, "names": ["age"]}, {"field-id": 4, "names":
["active"]}]'
+ ),
},
)
io = PyArrowFileIO()
@@ -4410,7 +4421,8 @@ def
test_orc_stripe_size_batch_size_compression_interaction(tmp_path: Path) -> N
# Assert batching behavior
assert len(batches) > 0, f"Should have at least one batch for
{description}"
assert len(batches) == actual_stripes, (
- f"Number of batches should match number of stripes for
{description}: {len(batches)} batches vs {actual_stripes} stripes"
+ f"Number of batches should match number of stripes for
{description}: "
+ f"{len(batches)} batches vs {actual_stripes} stripes"
)
# Assert data integrity
@@ -4480,7 +4492,10 @@ def test_orc_near_perfect_stripe_size_mapping(tmp_path:
Path) -> None:
{
"id": pa.array(
[
-
f"very_long_string_value_{i:06d}_with_lots_of_padding_to_make_it_harder_to_compress_{i
* 7919 % 100000:05d}_more_padding_{i * 7919 % 100000:05d}"
+ (
+
f"very_long_string_value_{i:06d}_with_lots_of_padding_to_make_it_harder_to_compress_"
+ f"{i * 7919 % 100000:05d}_more_padding_{i * 7919 %
100000:05d}"
+ )
for i in range(1, 50001)
]
) # 50K rows
@@ -4555,7 +4570,8 @@ def test_orc_near_perfect_stripe_size_mapping(tmp_path:
Path) -> None:
# Assert batching behavior
assert len(batches) > 0, f"Should have at least one batch for
stripe_size={stripe_size}"
assert len(batches) == actual_stripes, (
- f"Number of batches should match number of stripes for
stripe_size={stripe_size}: {len(batches)} batches vs {actual_stripes} stripes"
+ f"Number of batches should match number of stripes for
stripe_size={stripe_size}: "
+ f"{len(batches)} batches vs {actual_stripes} stripes"
)
# Assert data integrity
diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py
index 59a48576..433350c2 100644
--- a/tests/io/test_pyarrow_visitor.py
+++ b/tests/io/test_pyarrow_visitor.py
@@ -190,7 +190,8 @@ def test_pyarrow_timestamp_invalid_units() -> None:
with pytest.raises(
TypeError,
match=re.escape(
- "Iceberg does not yet support 'ns' timestamp precision. Use
'downcast-ns-timestamp-to-us-on-write' configuration property to automatically
downcast 'ns' to 'us' on write."
+ "Iceberg does not yet support 'ns' timestamp precision. Use
'downcast-ns-timestamp-to-us-on-write' "
+ "configuration property to automatically downcast 'ns' to 'us' on
write."
),
):
visit_pyarrow(pyarrow_type, _ConvertToIceberg())
@@ -212,7 +213,8 @@ def test_pyarrow_timestamp_tz_invalid_units() -> None:
with pytest.raises(
TypeError,
match=re.escape(
- "Iceberg does not yet support 'ns' timestamp precision. Use
'downcast-ns-timestamp-to-us-on-write' configuration property to automatically
downcast 'ns' to 'us' on write."
+ "Iceberg does not yet support 'ns' timestamp precision. Use
'downcast-ns-timestamp-to-us-on-write' "
+ "configuration property to automatically downcast 'ns' to 'us' on
write."
),
):
visit_pyarrow(pyarrow_type, _ConvertToIceberg())
@@ -832,8 +834,13 @@ def test_expression_to_complementary_pyarrow(
Not(bound_is_null_double_field),
)
result = _expression_to_complementary_pyarrow(bound_expr)
- # Notice an isNan predicate on a str column is automatically converted to
always false and removed from Or and thus will not appear in the pc.expr.
- assert (
- repr(result)
- == """<pyarrow.compute.Expression (((invert(((((string_field ==
"hello") and (float_field > 100)) or ((is_nan(float_field) and (double_field ==
0)) or (float_field > 100))) and invert(is_null(double_field,
{nan_is_null=false})))) or is_null(float_field, {nan_is_null=false})) or
is_null(string_field, {nan_is_null=false})) or is_nan(double_field))>"""
+ # Notice an isNan predicate on a str column is automatically converted to
always false and removed from Or
+ # and thus will not appear in the pc.expr.
+ expected_repr = (
+ '<pyarrow.compute.Expression (((invert(((((string_field == "hello")
and (float_field > 100)) '
+ "or ((is_nan(float_field) and (double_field == 0)) or (float_field >
100))) "
+ "and invert(is_null(double_field, {nan_is_null=false})))) "
+ "or is_null(float_field, {nan_is_null=false})) "
+ "or is_null(string_field, {nan_is_null=false})) or
is_nan(double_field))>"
)
+ assert repr(result) == expected_repr
diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py
index c567f3ff..e8de5f18 100644
--- a/tests/table/test_name_mapping.py
+++ b/tests/table/test_name_mapping.py
@@ -192,16 +192,20 @@ def test_json_mapped_field_no_field_id_serialization() ->
None:
]
)
- assert (
- table_name_mapping_nested_no_field_id.model_dump_json()
- ==
"""[{"names":["foo"],"field-id":1},{"names":["bar"]},{"names":["qux"],"field-id":2,"fields":[{"names":["element"]}]}]"""
+ assert table_name_mapping_nested_no_field_id.model_dump_json() == (
+
'[{"names":["foo"],"field-id":1},{"names":["bar"]},{"names":["qux"],"field-id":2,"fields":[{"names":["element"]}]}]'
)
def test_json_serialization(table_name_mapping_nested: NameMapping) -> None:
- assert (
- table_name_mapping_nested.model_dump_json()
- ==
"""[{"names":["foo"],"field-id":1},{"names":["bar"],"field-id":2},{"names":["baz"],"field-id":3},{"names":["qux"],"field-id":4,"fields":[{"names":["element"],"field-id":5}]},{"names":["quux"],"field-id":6,"fields":[{"names":["key"],"field-id":7},{"names":["value"],"field-id":8,"fields":[{"names":["key"],"field-id":9},{"names":["value"],"field-id":10}]}]},{"names":["location"],"field-id":11,"fields":[{"names":["element"],"field-id":12,"fields":[{"names":["latitude"],"field-id":
[...]
+ assert table_name_mapping_nested.model_dump_json() == (
+
'[{"names":["foo"],"field-id":1},{"names":["bar"],"field-id":2},{"names":["baz"],"field-id":3},'
+
'{"names":["qux"],"field-id":4,"fields":[{"names":["element"],"field-id":5}]},'
+
'{"names":["quux"],"field-id":6,"fields":[{"names":["key"],"field-id":7},'
+
'{"names":["value"],"field-id":8,"fields":[{"names":["key"],"field-id":9},{"names":["value"],"field-id":10}]}]},'
+
'{"names":["location"],"field-id":11,"fields":[{"names":["element"],"field-id":12,"fields":'
+
'[{"names":["latitude"],"field-id":13},{"names":["longitude"],"field-id":14}]}]},'
+
'{"names":["person"],"field-id":15,"fields":[{"names":["name"],"field-id":16},{"names":["age"],"field-id":17}]}]'
)
diff --git a/tests/table/test_partitioning.py b/tests/table/test_partitioning.py
index 576297c6..cfe3e2c1 100644
--- a/tests/table/test_partitioning.py
+++ b/tests/table/test_partitioning.py
@@ -120,9 +120,10 @@ def test_serialize_partition_spec() -> None:
PartitionField(source_id=2, field_id=1001,
transform=BucketTransform(num_buckets=25), name="int_bucket"),
spec_id=3,
)
- assert (
- partitioned.model_dump_json()
- ==
"""{"spec-id":3,"fields":[{"source-id":1,"field-id":1000,"transform":"truncate[19]","name":"str_truncate"},{"source-id":2,"field-id":1001,"transform":"bucket[25]","name":"int_bucket"}]}"""
+ assert partitioned.model_dump_json() == (
+ '{"spec-id":3,"fields":['
+
'{"source-id":1,"field-id":1000,"transform":"truncate[19]","name":"str_truncate"},'
+
'{"source-id":2,"field-id":1001,"transform":"bucket[25]","name":"int_bucket"}]}'
)
@@ -134,7 +135,11 @@ def test_deserialize_unpartition_spec() -> None:
def test_deserialize_partition_spec() -> None:
- json_partition_spec = """{"spec-id": 3, "fields": [{"source-id": 1,
"field-id": 1000, "transform": "truncate[19]", "name": "str_truncate"},
{"source-id": 2, "field-id": 1001, "transform": "bucket[25]", "name":
"int_bucket"}]}"""
+ json_partition_spec = (
+ '{"spec-id": 3, "fields": ['
+ '{"source-id": 1, "field-id": 1000, "transform": "truncate[19]",
"name": "str_truncate"}, '
+ '{"source-id": 2, "field-id": 1001, "transform": "bucket[25]", "name":
"int_bucket"}]}'
+ )
spec = PartitionSpec.model_validate_json(json_partition_spec)
diff --git a/tests/table/test_refs.py b/tests/table/test_refs.py
index e6b7006a..dbd0fee1 100644
--- a/tests/table/test_refs.py
+++ b/tests/table/test_refs.py
@@ -31,9 +31,9 @@ def test_snapshot_with_properties_repr() -> None:
max_ref_age_ms=10000000,
)
- assert (
- repr(snapshot_ref)
- == """SnapshotRef(snapshot_id=3051729675574597004,
snapshot_ref_type=SnapshotRefType.TAG, min_snapshots_to_keep=None,
max_snapshot_age_ms=None, max_ref_age_ms=10000000)"""
+ assert repr(snapshot_ref) == (
+ "SnapshotRef(snapshot_id=3051729675574597004,
snapshot_ref_type=SnapshotRefType.TAG, "
+ "min_snapshots_to_keep=None, max_snapshot_age_ms=None,
max_ref_age_ms=10000000)"
)
assert snapshot_ref == eval(repr(snapshot_ref))
diff --git a/tests/table/test_snapshots.py b/tests/table/test_snapshots.py
index 4aa9521b..cfdc5162 100644
--- a/tests/table/test_snapshots.py
+++ b/tests/table/test_snapshots.py
@@ -79,9 +79,9 @@ def test_serialize_summary_with_properties() -> None:
def test_serialize_snapshot(snapshot: Snapshot) -> None:
- assert (
- snapshot.model_dump_json()
- ==
"""{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append"},"schema-id":3}"""
+ assert snapshot.model_dump_json() == (
+
'{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,'
+
'"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append"},"schema-id":3}'
)
@@ -96,14 +96,17 @@ def test_serialize_snapshot_without_sequence_number() ->
None:
schema_id=3,
)
actual = snapshot.model_dump_json()
- expected =
"""{"snapshot-id":25,"parent-snapshot-id":19,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append"},"schema-id":3}"""
+ expected = (
+
'{"snapshot-id":25,"parent-snapshot-id":19,"timestamp-ms":1602638573590,'
+
'"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append"},"schema-id":3}'
+ )
assert actual == expected
def test_serialize_snapshot_with_properties(snapshot_with_properties:
Snapshot) -> None:
- assert (
- snapshot_with_properties.model_dump_json()
- ==
"""{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append","foo":"bar"},"schema-id":3}"""
+ assert snapshot_with_properties.model_dump_json() == (
+
'{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,'
+
'"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append","foo":"bar"},"schema-id":3}'
)
@@ -119,36 +122,45 @@ def test_deserialize_summary_with_properties() -> None:
def test_deserialize_snapshot(snapshot: Snapshot) -> None:
- payload = """{"snapshot-id": 25, "parent-snapshot-id": 19,
"sequence-number": 200, "timestamp-ms": 1602638573590, "manifest-list":
"s3:/a/b/c.avro", "summary": {"operation": "append"}, "schema-id": 3}"""
+ payload = (
+ '{"snapshot-id": 25, "parent-snapshot-id": 19, "sequence-number": 200,
"timestamp-ms": 1602638573590, '
+ '"manifest-list": "s3:/a/b/c.avro", "summary": {"operation":
"append"}, "schema-id": 3}'
+ )
actual = Snapshot.model_validate_json(payload)
assert actual == snapshot
def test_deserialize_snapshot_without_operation(snapshot: Snapshot) -> None:
- payload = """{"snapshot-id": 25, "parent-snapshot-id": 19,
"sequence-number": 200, "timestamp-ms": 1602638573590, "manifest-list":
"s3:/a/b/c.avro", "summary": {}, "schema-id": 3}"""
+ payload = (
+ '{"snapshot-id": 25, "parent-snapshot-id": 19, "sequence-number": 200,
"timestamp-ms": 1602638573590, '
+ '"manifest-list": "s3:/a/b/c.avro", "summary": {}, "schema-id": 3}'
+ )
with pytest.warns(UserWarning, match="Encountered invalid snapshot
summary: operation is missing, defaulting to overwrite"):
actual = Snapshot.model_validate_json(payload)
assert actual.summary.operation == Operation.OVERWRITE
def test_deserialize_snapshot_with_properties(snapshot_with_properties:
Snapshot) -> None:
- payload =
"""{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append","foo":"bar"},"schema-id":3}"""
+ payload = (
+
'{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,'
+
'"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append","foo":"bar"},"schema-id":3}'
+ )
snapshot = Snapshot.model_validate_json(payload)
assert snapshot == snapshot_with_properties
def test_snapshot_repr(snapshot: Snapshot) -> None:
- assert (
- repr(snapshot)
- == """Snapshot(snapshot_id=25, parent_snapshot_id=19,
sequence_number=200, timestamp_ms=1602638573590,
manifest_list='s3:/a/b/c.avro', summary=Summary(Operation.APPEND),
schema_id=3)"""
+ assert repr(snapshot) == (
+ "Snapshot(snapshot_id=25, parent_snapshot_id=19, sequence_number=200,
timestamp_ms=1602638573590, "
+ "manifest_list='s3:/a/b/c.avro', summary=Summary(Operation.APPEND),
schema_id=3)"
)
assert snapshot == eval(repr(snapshot))
def test_snapshot_with_properties_repr(snapshot_with_properties: Snapshot) ->
None:
- assert (
- repr(snapshot_with_properties)
- == """Snapshot(snapshot_id=25, parent_snapshot_id=19,
sequence_number=200, timestamp_ms=1602638573590,
manifest_list='s3:/a/b/c.avro', summary=Summary(Operation.APPEND, **{'foo':
'bar'}), schema_id=3)"""
+ assert repr(snapshot_with_properties) == (
+ "Snapshot(snapshot_id=25, parent_snapshot_id=19, sequence_number=200,
timestamp_ms=1602638573590, "
+ "manifest_list='s3:/a/b/c.avro', summary=Summary(Operation.APPEND,
**{'foo': 'bar'}), schema_id=3)"
)
assert snapshot_with_properties == eval(repr(snapshot_with_properties))
@@ -226,7 +238,10 @@ def test_snapshot_summary_collector_with_partition() ->
None:
"deleted-records": "300",
"changed-partition-count": "2",
"partition-summaries-included": "true",
- "partitions.int_field=1":
"added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100",
+ "partitions.int_field=1": (
+ "added-files-size=1234,removed-files-size=1234,added-data-files=1,"
+ "deleted-data-files=1,added-records=100,deleted-records=100"
+ ),
"partitions.int_field=2":
"removed-files-size=4321,deleted-data-files=1,deleted-records=200",
}
@@ -262,7 +277,10 @@ def
test_snapshot_summary_collector_with_partition_limit_in_constructor() -> Non
"deleted-records": "300",
"changed-partition-count": "2",
"partition-summaries-included": "true",
- "partitions.int_field=1":
"added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100",
+ "partitions.int_field=1": (
+ "added-files-size=1234,removed-files-size=1234,added-data-files=1,"
+ "deleted-data-files=1,added-records=100,deleted-records=100"
+ ),
"partitions.int_field=2":
"removed-files-size=4321,deleted-data-files=1,deleted-records=200",
}
diff --git a/tests/table/test_sorting.py b/tests/table/test_sorting.py
index cb7a2c18..eaf6076a 100644
--- a/tests/table/test_sorting.py
+++ b/tests/table/test_sorting.py
@@ -53,12 +53,22 @@ def test_serialize_sort_order_unsorted() -> None:
def test_serialize_sort_order(sort_order: SortOrder) -> None:
- expected =
'{"order-id":22,"fields":[{"source-id":19,"transform":"identity","direction":"asc","null-order":"nulls-first"},{"source-id":25,"transform":"bucket[4]","direction":"desc","null-order":"nulls-last"},{"source-id":22,"transform":"void","direction":"asc","null-order":"nulls-first"}]}'
+ expected = (
+ '{"order-id":22,"fields":['
+
'{"source-id":19,"transform":"identity","direction":"asc","null-order":"nulls-first"},'
+
'{"source-id":25,"transform":"bucket[4]","direction":"desc","null-order":"nulls-last"},'
+
'{"source-id":22,"transform":"void","direction":"asc","null-order":"nulls-first"}]}'
+ )
assert sort_order.model_dump_json() == expected
def test_deserialize_sort_order(sort_order: SortOrder) -> None:
- payload = '{"order-id": 22, "fields": [{"source-id": 19, "transform":
"identity", "direction": "asc", "null-order": "nulls-first"}, {"source-id": 25,
"transform": "bucket[4]", "direction": "desc", "null-order": "nulls-last"},
{"source-id": 22, "transform": "void", "direction": "asc", "null-order":
"nulls-first"}]}'
+ payload = (
+ '{"order-id": 22, "fields": ['
+ '{"source-id": 19, "transform": "identity", "direction": "asc",
"null-order": "nulls-first"}, '
+ '{"source-id": 25, "transform": "bucket[4]", "direction": "desc",
"null-order": "nulls-last"}, '
+ '{"source-id": 22, "transform": "void", "direction": "asc",
"null-order": "nulls-first"}]}'
+ )
assert SortOrder.model_validate_json(payload) == sort_order
@@ -90,7 +100,16 @@ def test_sorting_to_string(sort_order: SortOrder) -> None:
def test_sorting_to_repr(sort_order: SortOrder) -> None:
- expected = """SortOrder(SortField(source_id=19,
transform=IdentityTransform(), direction=SortDirection.ASC,
null_order=NullOrder.NULLS_FIRST), SortField(source_id=25,
transform=BucketTransform(num_buckets=4), direction=SortDirection.DESC,
null_order=NullOrder.NULLS_LAST), SortField(source_id=22,
transform=VoidTransform(), direction=SortDirection.ASC,
null_order=NullOrder.NULLS_FIRST), order_id=22)"""
+ expected = (
+ "SortOrder("
+ "SortField(source_id=19, transform=IdentityTransform(), "
+ "direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST), "
+ "SortField(source_id=25, transform=BucketTransform(num_buckets=4), "
+ "direction=SortDirection.DESC, null_order=NullOrder.NULLS_LAST), "
+ "SortField(source_id=22, transform=VoidTransform(), "
+ "direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST), "
+ "order_id=22)"
+ )
assert repr(sort_order) == expected
diff --git a/tests/table/test_upsert.py b/tests/table/test_upsert.py
index 9bc61799..08f90c66 100644
--- a/tests/table/test_upsert.py
+++ b/tests/table/test_upsert.py
@@ -120,7 +120,10 @@ def assert_upsert_result(res: UpsertResult,
expected_updated: int, expected_inse
@pytest.mark.parametrize(
- "join_cols, src_start_row, src_end_row, target_start_row, target_end_row,
when_matched_update_all, when_not_matched_insert_all, expected_updated,
expected_inserted",
+ (
+ "join_cols, src_start_row, src_end_row, target_start_row,
target_end_row, "
+ "when_matched_update_all, when_not_matched_insert_all,
expected_updated, expected_inserted"
+ ),
[
(["order_id"], 1, 2, 2, 3, True, True, 1, 1), # single row
(["order_id"], 5001, 15000, 1, 10000, True, True, 5000, 5000), # 10k
rows
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index b366551c..e38bdbd6 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -66,7 +66,8 @@ Notes:
- Stored directly
- 'Z' -> 90
Decimal:
- - Stored as unscaled values in the form of two's-complement big-endian
binary using the minimum number of bytes for the values
+ - Stored as unscaled values in the form of two's-complement big-endian
binary
+ using the minimum number of bytes for the values
- 345 is 0...1|01011001 in binary
- 00000001 -> 1, 01011001 -> 89
Float:
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 0c006879..93ddc162 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -95,7 +95,10 @@ def test_schema_str(table_schema_simple: Schema) -> None:
def test_schema_repr_single_field() -> None:
"""Test schema representation"""
actual = repr(Schema(NestedField(field_id=1, name="foo",
field_type=StringType()), schema_id=1))
- expected = "Schema(NestedField(field_id=1, name='foo',
field_type=StringType(), required=False), schema_id=1, identifier_field_ids=[])"
+ expected = (
+ "Schema(NestedField(field_id=1, name='foo', field_type=StringType(),
required=False), "
+ "schema_id=1, identifier_field_ids=[])"
+ )
assert expected == actual
@@ -108,7 +111,11 @@ def test_schema_repr_two_fields() -> None:
schema_id=1,
)
)
- expected = "Schema(NestedField(field_id=1, name='foo',
field_type=StringType(), required=False), NestedField(field_id=2, name='bar',
field_type=IntegerType(), required=False), schema_id=1,
identifier_field_ids=[])"
+ expected = (
+ "Schema(NestedField(field_id=1, name='foo', field_type=StringType(),
required=False), "
+ "NestedField(field_id=2, name='bar', field_type=IntegerType(),
required=False), "
+ "schema_id=1, identifier_field_ids=[])"
+ )
assert expected == actual
@@ -428,13 +435,29 @@ def
test_build_position_accessors_with_struct(table_schema_nested: Schema) -> No
def test_serialize_schema(table_schema_with_full_nested_fields: Schema) ->
None:
actual = table_schema_with_full_nested_fields.model_dump_json()
- expected =
"""{"type":"struct","fields":[{"id":1,"name":"foo","type":"string","required":false,"doc":"foo
doc","initial-default":"foo initial","write-default":"foo
write"},{"id":2,"name":"bar","type":"int","required":true,"doc":"bar
doc","initial-default":42,"write-default":43},{"id":3,"name":"baz","type":"boolean","required":false,"doc":"baz
doc","initial-default":true,"write-default":false}],"schema-id":1,"identifier-field-ids":[2]}"""
+ expected = (
+ '{"type":"struct","fields":['
+ '{"id":1,"name":"foo","type":"string","required":false,"doc":"foo
doc",'
+ '"initial-default":"foo initial","write-default":"foo write"},'
+ '{"id":2,"name":"bar","type":"int","required":true,"doc":"bar doc",'
+ '"initial-default":42,"write-default":43},'
+ '{"id":3,"name":"baz","type":"boolean","required":false,"doc":"baz
doc",'
+ '"initial-default":true,"write-default":false}],'
+ '"schema-id":1,"identifier-field-ids":[2]}'
+ )
assert actual == expected
def test_deserialize_schema(table_schema_with_full_nested_fields: Schema) ->
None:
actual = Schema.model_validate_json(
- """{"type": "struct", "fields": [{"id": 1, "name": "foo", "type":
"string", "required": false, "doc": "foo doc", "initial-default": "foo
initial", "write-default": "foo write"}, {"id": 2, "name": "bar", "type":
"int", "required": true, "doc": "bar doc", "initial-default": 42,
"write-default": 43}, {"id": 3, "name": "baz", "type": "boolean", "required":
false, "doc": "baz doc", "initial-default": true, "write-default": false}],
"schema-id": 1, "identifier-field-ids": [2]}"""
+ '{"type": "struct", "fields": ['
+ '{"id": 1, "name": "foo", "type": "string", "required": false, "doc":
"foo doc", '
+ '"initial-default": "foo initial", "write-default": "foo write"}, '
+ '{"id": 2, "name": "bar", "type": "int", "required": true, "doc": "bar
doc", '
+ '"initial-default": 42, "write-default": 43}, '
+ '{"id": 3, "name": "baz", "type": "boolean", "required": false, "doc":
"baz doc", '
+ '"initial-default": true, "write-default": false}], '
+ '"schema-id": 1, "identifier-field-ids": [2]}'
)
expected = table_schema_with_full_nested_fields
assert actual == expected
@@ -885,22 +908,22 @@ def
test_identifier_fields_fails(table_schema_nested_with_struct_key_map: Schema
with pytest.raises(ValueError) as exc_info:
Schema(*table_schema_nested_with_struct_key_map.fields, schema_id=1,
identifier_field_ids=[23])
assert (
- f"Cannot add field zip as an identifier field: must not be nested in
{table_schema_nested_with_struct_key_map.find_field('location')}"
- in str(exc_info.value)
+ f"Cannot add field zip as an identifier field: must not be nested in "
+ f"{table_schema_nested_with_struct_key_map.find_field('location')}" in
str(exc_info.value)
)
with pytest.raises(ValueError) as exc_info:
Schema(*table_schema_nested_with_struct_key_map.fields, schema_id=1,
identifier_field_ids=[26])
assert (
- f"Cannot add field x as an identifier field: must not be nested in
{table_schema_nested_with_struct_key_map.find_field('points')}"
- in str(exc_info.value)
+ f"Cannot add field x as an identifier field: must not be nested in "
+ f"{table_schema_nested_with_struct_key_map.find_field('points')}" in
str(exc_info.value)
)
with pytest.raises(ValueError) as exc_info:
Schema(*table_schema_nested_with_struct_key_map.fields, schema_id=1,
identifier_field_ids=[17])
assert (
- f"Cannot add field age as an identifier field: must not be nested in
an optional field
{table_schema_nested_with_struct_key_map.find_field('person')}"
- in str(exc_info.value)
+ f"Cannot add field age as an identifier field: must not be nested in
an optional field "
+ f"{table_schema_nested_with_struct_key_map.find_field('person')}" in
str(exc_info.value)
)
diff --git a/tests/test_types.py b/tests/test_types.py
index 707deb16..64f3eb95 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -533,9 +533,9 @@ def
test_repr_nested_field_default_nones_should_not_appear() -> None:
repr(NestedField(1, "required_field", StringType(), required=False,
initial_default="hello", write_default=None))
== "NestedField(field_id=1, name='required_field',
field_type=StringType(), required=False, initial_default='hello')"
)
- assert (
- repr(NestedField(1, "required_field", StringType(), required=False,
initial_default="hello", write_default="bye"))
- == "NestedField(field_id=1, name='required_field',
field_type=StringType(), required=False, initial_default='hello',
write_default='bye')"
+ assert repr(NestedField(1, "required_field", StringType(), required=False,
initial_default="hello", write_default="bye")) == (
+ "NestedField(field_id=1, name='required_field',
field_type=StringType(), required=False, "
+ "initial_default='hello', write_default='bye')"
)
@@ -633,9 +633,9 @@ def test_str_struct(simple_struct: StructType) -> None:
def test_repr_struct(simple_struct: StructType) -> None:
- assert (
- repr(simple_struct)
- == "StructType(fields=(NestedField(field_id=1, name='required_field',
field_type=StringType(), required=True), NestedField(field_id=2,
name='optional_field', field_type=IntegerType(), required=False),))"
+ assert repr(simple_struct) == (
+ "StructType(fields=(NestedField(field_id=1, name='required_field',
field_type=StringType(), required=True), "
+ "NestedField(field_id=2, name='optional_field',
field_type=IntegerType(), required=False),))"
)
diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py
index d12019c9..abd9878e 100644
--- a/tests/utils/test_manifest.py
+++ b/tests/utils/test_manifest.py
@@ -79,9 +79,9 @@ def test_read_manifest_entry(generated_manifest_entry_file:
str) -> None:
data_file = manifest_entry.data_file
assert data_file.content == DataFileContent.DATA
- assert (
- data_file.file_path
- ==
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
+ assert data_file.file_path == (
+ "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/"
+ "00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
)
assert data_file.file_format == FileFormat.PARQUET
assert repr(data_file.partition) == "Record[1, 1925]"
@@ -396,7 +396,10 @@ def test_write_manifest(
expected_metadata = {
"schema": test_schema.model_dump_json(),
- "partition-spec":
"""[{"source-id":1,"field-id":1000,"transform":"identity","name":"VendorID"},{"source-id":2,"field-id":1001,"transform":"day","name":"tpep_pickup_day"}]""",
+ "partition-spec": (
+
'[{"source-id":1,"field-id":1000,"transform":"identity","name":"VendorID"},'
+
'{"source-id":2,"field-id":1001,"transform":"day","name":"tpep_pickup_day"}]'
+ ),
"partition-spec-id": str(demo_manifest_file.partition_spec_id),
"format-version": str(format_version),
}
@@ -416,9 +419,9 @@ def test_write_manifest(
data_file = manifest_entry.data_file
assert data_file.content == DataFileContent.DATA
- assert (
- data_file.file_path
- ==
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
+ assert data_file.file_path == (
+ "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/"
+ "00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
)
assert data_file.file_format == FileFormat.PARQUET
assert data_file.partition == Record(1, 1925)