This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 4ce5318e5c Python: Update pre-commit to the latest version (#7436)
4ce5318e5c is described below
commit 4ce5318e5cfae71622c7e980ae660f7a66f443de
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon May 1 23:38:50 2023 +0200
Python: Update pre-commit to the latest version (#7436)
Removed some ignores and removed some list materialization
---
python/.pre-commit-config.yaml | 15 +++++++-------
python/pyiceberg/conversions.py | 6 +++---
python/pyiceberg/expressions/__init__.py | 2 +-
python/pyiceberg/expressions/literals.py | 15 +++++++-------
python/pyiceberg/expressions/visitors.py | 34 ++++++++++++++++----------------
python/pyiceberg/schema.py | 2 +-
python/pyiceberg/typedef.py | 2 +-
python/pyiceberg/utils/decimal.py | 2 +-
python/tests/test_transforms.py | 8 +++-----
python/tests/utils/test_bin_packing.py | 2 +-
10 files changed, 43 insertions(+), 45 deletions(-)
diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml
index 2abac29d27..1c4de6121e 100644
--- a/python/.pre-commit-config.yaml
+++ b/python/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
- id: check-yaml
- id: check-ast
- repo: https://github.com/ambv/black
- rev: 23.1.0
+ rev: 23.3.0
hooks:
- id: black
- repo: https://github.com/pre-commit/mirrors-isort
@@ -38,13 +38,13 @@ repos:
- id: isort
args: [--settings-path=python/pyproject.toml]
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: v0.991
+ rev: v1.2.0
hooks:
- id: mypy
args:
[--install-types, --non-interactive, --config=python/pyproject.toml]
- repo: https://github.com/pre-commit/mirrors-prettier
- rev: v2.7.1
+ rev: v3.0.0-alpha.9-for-vscode
hooks:
- id: prettier
args: [--plugin=prettier-plugin-toml]
@@ -57,12 +57,12 @@ repos:
- id: pycln
args: [--config=python/pyproject.toml]
- repo: https://github.com/asottile/pyupgrade
- rev: v3.3.1
+ rev: v3.3.2
hooks:
- id: pyupgrade
args: [--py38-plus, --keep-runtime-typing]
- repo: https://github.com/pycqa/pylint
- rev: v2.16.0
+ rev: v3.0.0a6
hooks:
- id: pylint
args: [--rcfile=python/pylintrc]
@@ -70,9 +70,10 @@ repos:
rev: "6.0.0"
hooks:
- id: flake8
- args: ["--ignore=E501,W503,E203,B024"]
+ args: ["--ignore=E501,W503,E203,B024,B028"]
additional_dependencies:
- [flake8-bugbear==22.12.6, flake8-comprehensions==3.10.1]
+ - flake8-bugbear==23.3.23
+ - flake8-comprehensions==3.12.0
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.16
hooks:
diff --git a/python/pyiceberg/conversions.py b/python/pyiceberg/conversions.py
index 37751a0280..075db4be6d 100644
--- a/python/pyiceberg/conversions.py
+++ b/python/pyiceberg/conversions.py
@@ -230,9 +230,9 @@ def _(primitive_type: DecimalType, value: Decimal) -> bytes:
bytes: The byte representation of `value`
"""
_, digits, exponent = value.as_tuple()
-
- if -exponent != primitive_type.scale:
- raise ValueError(f"Cannot serialize value, scale of value does not
match type {primitive_type}: {-exponent}")
+ exponent = abs(int(exponent))
+ if exponent != primitive_type.scale:
+ raise ValueError(f"Cannot serialize value, scale of value does not
match type {primitive_type}: {exponent}")
elif len(digits) > primitive_type.precision:
raise ValueError(
f"Cannot serialize value, precision of value is greater than
precision of type {primitive_type}: {len(digits)}"
diff --git a/python/pyiceberg/expressions/__init__.py
b/python/pyiceberg/expressions/__init__.py
index 6780c6dca4..5be291b0e3 100644
--- a/python/pyiceberg/expressions/__init__.py
+++ b/python/pyiceberg/expressions/__init__.py
@@ -589,7 +589,7 @@ class NotIn(SetPredicate[L], ABC):
if count == 0:
return AlwaysTrue()
elif count == 1:
- return NotEqualTo(term, next(iter(literals_set))) # type: ignore
+ return NotEqualTo(term, next(iter(literals_set)))
else:
return super().__new__(cls)
diff --git a/python/pyiceberg/expressions/literals.py
b/python/pyiceberg/expressions/literals.py
index 9aa43531a3..8414ce533e 100644
--- a/python/pyiceberg/expressions/literals.py
+++ b/python/pyiceberg/expressions/literals.py
@@ -122,7 +122,7 @@ def literal(value: L) -> Literal[L]:
LongLiteral(123)
"""
if isinstance(value, float):
- return DoubleLiteral(value)
+ return DoubleLiteral(value) # type: ignore
elif isinstance(value, bool):
return BooleanLiteral(value)
elif isinstance(value, int):
@@ -437,12 +437,12 @@ class DecimalLiteral(Literal[Decimal]):
super().__init__(value, Decimal)
def increment(self) -> Literal[Decimal]:
- original_scale = abs(self.value.as_tuple().exponent)
+ original_scale = abs(int(self.value.as_tuple().exponent))
unscaled = decimal_to_unscaled(self.value)
return DecimalLiteral(unscaled_to_decimal(unscaled + 1,
original_scale))
def decrement(self) -> Literal[Decimal]:
- original_scale = abs(self.value.as_tuple().exponent)
+ original_scale = abs(int(self.value.as_tuple().exponent))
unscaled = decimal_to_unscaled(self.value)
return DecimalLiteral(unscaled_to_decimal(unscaled - 1,
original_scale))
@@ -452,7 +452,7 @@ class DecimalLiteral(Literal[Decimal]):
@to.register(DecimalType)
def _(self, type_var: DecimalType) -> Literal[Decimal]:
- if type_var.scale == abs(self.value.as_tuple().exponent):
+ if type_var.scale == abs(int(self.value.as_tuple().exponent)):
return self
raise ValueError(f"Could not convert {self.value} into a {type_var}")
@@ -558,12 +558,11 @@ class StringLiteral(Literal[str]):
@to.register(DecimalType)
def _(self, type_var: DecimalType) -> Literal[Decimal]:
dec = Decimal(self.value)
- if type_var.scale == abs(dec.as_tuple().exponent):
+ scale = abs(int(dec.as_tuple().exponent))
+ if type_var.scale == scale:
return DecimalLiteral(dec)
else:
- raise ValueError(
- f"Could not convert {self.value} into a {type_var}, scales
differ {type_var.scale} <> {abs(dec.as_tuple().exponent)}"
- )
+ raise ValueError(f"Could not convert {self.value} into a
{type_var}, scales differ {type_var.scale} <> {scale}")
@to.register(BooleanType)
def _(self, type_var: BooleanType) -> Literal[bool]:
diff --git a/python/pyiceberg/expressions/visitors.py
b/python/pyiceberg/expressions/visitors.py
index 33274c7903..4de1ce4236 100644
--- a/python/pyiceberg/expressions/visitors.py
+++ b/python/pyiceberg/expressions/visitors.py
@@ -1197,8 +1197,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if not isinstance(field.field_type, PrimitiveType):
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
- if lower_bound_bytes := self.lower_bounds.get(field_id): # type:
ignore
- lower_bound = from_bytes(field.field_type, lower_bound_bytes) #
type: ignore
+ if lower_bound_bytes := self.lower_bounds.get(field_id):
+ lower_bound = from_bytes(field.field_type, lower_bound_bytes)
if self._is_nan(lower_bound):
# NaN indicates unreliable bounds. See the
InclusiveMetricsEvaluator docs for more.
@@ -1219,8 +1219,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if not isinstance(field.field_type, PrimitiveType):
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
- if lower_bound_bytes := self.lower_bounds.get(field_id): # type:
ignore
- lower_bound = from_bytes(field.field_type, lower_bound_bytes) #
type: ignore
+ if lower_bound_bytes := self.lower_bounds.get(field_id):
+ lower_bound = from_bytes(field.field_type, lower_bound_bytes)
if self._is_nan(lower_bound):
# NaN indicates unreliable bounds. See the
InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
@@ -1240,8 +1240,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if not isinstance(field.field_type, PrimitiveType):
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
- if upper_bound_bytes := self.upper_bounds.get(field_id): # type:
ignore
- upper_bound = from_bytes(field.field_type, upper_bound_bytes) #
type: ignore
+ if upper_bound_bytes := self.upper_bounds.get(field_id):
+ upper_bound = from_bytes(field.field_type, upper_bound_bytes)
if upper_bound <= literal.value:
if self._is_nan(upper_bound):
# NaN indicates unreliable bounds. See the
InclusiveMetricsEvaluator docs for more.
@@ -1261,8 +1261,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if not isinstance(field.field_type, PrimitiveType):
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
- if upper_bound_bytes := self.upper_bounds.get(field_id): # type:
ignore
- upper_bound = from_bytes(field.field_type, upper_bound_bytes) #
type: ignore
+ if upper_bound_bytes := self.upper_bounds.get(field_id):
+ upper_bound = from_bytes(field.field_type, upper_bound_bytes)
if upper_bound < literal.value:
if self._is_nan(upper_bound):
# NaN indicates unreliable bounds. See the
InclusiveMetricsEvaluator docs for more.
@@ -1282,8 +1282,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if not isinstance(field.field_type, PrimitiveType):
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
- if lower_bound_bytes := self.lower_bounds.get(field_id): # type:
ignore
- lower_bound = from_bytes(field.field_type, lower_bound_bytes) #
type: ignore
+ if lower_bound_bytes := self.lower_bounds.get(field_id):
+ lower_bound = from_bytes(field.field_type, lower_bound_bytes)
if self._is_nan(lower_bound):
# NaN indicates unreliable bounds. See the
InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
@@ -1291,8 +1291,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if lower_bound > literal.value:
return ROWS_CANNOT_MATCH
- if upper_bound_bytes := self.upper_bounds.get(field_id): # type:
ignore
- upper_bound = from_bytes(field.field_type, upper_bound_bytes) #
type: ignore
+ if upper_bound_bytes := self.upper_bounds.get(field_id):
+ upper_bound = from_bytes(field.field_type, upper_bound_bytes)
if self._is_nan(upper_bound):
# NaN indicates unreliable bounds. See the
InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
@@ -1319,8 +1319,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if not isinstance(field.field_type, PrimitiveType):
raise ValueError(f"Expected PrimitiveType: {field.field_type}")
- if lower_bound_bytes := self.lower_bounds.get(field_id): # type:
ignore
- lower_bound = from_bytes(field.field_type, lower_bound_bytes) #
type: ignore
+ if lower_bound_bytes := self.lower_bounds.get(field_id):
+ lower_bound = from_bytes(field.field_type, lower_bound_bytes)
if self._is_nan(lower_bound):
# NaN indicates unreliable bounds. See the
InclusiveMetricsEvaluator docs for more.
return ROWS_MIGHT_MATCH
@@ -1329,8 +1329,8 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
if len(literals) == 0:
return ROWS_CANNOT_MATCH
- if upper_bound_bytes := self.upper_bounds.get(field_id): # type:
ignore
- upper_bound = from_bytes(field.field_type, upper_bound_bytes) #
type: ignore
+ if upper_bound_bytes := self.upper_bounds.get(field_id):
+ upper_bound = from_bytes(field.field_type, upper_bound_bytes)
# this is different from Java, here NaN is always larger
if self._is_nan(upper_bound):
return ROWS_MIGHT_MATCH
@@ -1367,7 +1367,7 @@ class
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
return ROWS_CANNOT_MATCH
if upper_bound_bytes := self.upper_bounds.get(field_id):
- upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
# type: ignore
+ upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
# truncate upper bound so that its length is not greater than the
length of prefix
if upper_bound is not None and upper_bound[:len_prefix] < prefix:
diff --git a/python/pyiceberg/schema.py b/python/pyiceberg/schema.py
index ae1f532b68..7c99d458f3 100644
--- a/python/pyiceberg/schema.py
+++ b/python/pyiceberg/schema.py
@@ -107,7 +107,7 @@ class Schema(IcebergBaseModel):
return False
identifier_field_ids_is_equal = self.identifier_field_ids ==
other.identifier_field_ids
- schema_is_equal = all([lhs == rhs for lhs, rhs in zip(self.columns,
other.columns)])
+ schema_is_equal = all(lhs == rhs for lhs, rhs in zip(self.columns,
other.columns))
return identifier_field_ids_is_equal and schema_is_equal
diff --git a/python/pyiceberg/typedef.py b/python/pyiceberg/typedef.py
index bdf467101c..23d3217a9c 100644
--- a/python/pyiceberg/typedef.py
+++ b/python/pyiceberg/typedef.py
@@ -132,7 +132,7 @@ class Record(StructProtocol):
self._position_to_field_name = {idx: field.name for idx, field in
enumerate(struct.fields)}
elif named_data:
# Order of named_data is preserved (PEP 468) so this can be used
to generate the position dict
- self._position_to_field_name = {idx: name for idx, name in
enumerate(named_data.keys())}
+ self._position_to_field_name = dict(enumerate(named_data.keys()))
else:
self._position_to_field_name = {idx: f"field{idx + 1}" for idx in
range(len(data))}
diff --git a/python/pyiceberg/utils/decimal.py
b/python/pyiceberg/utils/decimal.py
index 40bc087390..fb58034e68 100644
--- a/python/pyiceberg/utils/decimal.py
+++ b/python/pyiceberg/utils/decimal.py
@@ -87,4 +87,4 @@ def truncate_decimal(value: Decimal, width: int) -> Decimal:
"""
unscaled_value = decimal_to_unscaled(value)
applied_value = unscaled_value - (((unscaled_value % width) + width) %
width)
- return unscaled_to_decimal(applied_value, -value.as_tuple().exponent)
+ return unscaled_to_decimal(applied_value,
abs(int(value.as_tuple().exponent)))
diff --git a/python/tests/test_transforms.py b/python/tests/test_transforms.py
index ba03393dd8..201dbbbd6d 100644
--- a/python/tests/test_transforms.py
+++ b/python/tests/test_transforms.py
@@ -772,9 +772,7 @@ def
test_projection_identity_unary(bound_reference_timestamp: BoundReference[int
def test_projection_identity_literal(bound_reference_timestamp:
BoundReference[int]) -> None:
assert IdentityTransform().project(
"name", BoundEqualTo(term=bound_reference_timestamp,
literal=TimestampLiteral(TIMESTAMP_EXAMPLE))
- ) == EqualTo(
- term="name", literal=TimestampLiteral(TIMESTAMP_EXAMPLE) # type:
ignore
- )
+ ) == EqualTo(term="name", literal=TimestampLiteral(TIMESTAMP_EXAMPLE))
def test_projection_identity_set_in(bound_reference_timestamp:
BoundReference[int]) -> None:
@@ -786,7 +784,7 @@ def
test_projection_identity_set_in(bound_reference_timestamp: BoundReference[in
),
) == In(
term="name",
- literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS),
TimestampLiteral(TIMESTAMP_EXAMPLE)}, # type: ignore
+ literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS),
TimestampLiteral(TIMESTAMP_EXAMPLE)},
)
@@ -799,7 +797,7 @@ def
test_projection_identity_set_not_in(bound_reference_timestamp: BoundReferenc
),
) == NotIn(
term="name",
- literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS),
TimestampLiteral(TIMESTAMP_EXAMPLE)}, # type: ignore
+ literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS),
TimestampLiteral(TIMESTAMP_EXAMPLE)},
)
diff --git a/python/tests/utils/test_bin_packing.py
b/python/tests/utils/test_bin_packing.py
index c021c94d25..054ea79556 100644
--- a/python/tests/utils/test_bin_packing.py
+++ b/python/tests/utils/test_bin_packing.py
@@ -41,7 +41,7 @@ def test_bin_packing(splits: List[int], lookback: int,
split_size: int, open_cos
return max(x, open_cost)
item_list_sums: List[int] = [sum(item) for item in PackingIterator(splits,
split_size, lookback, weight_func)]
- assert all([split_size >= item_sum >= 0 for item_sum in item_list_sums])
+ assert all(split_size >= item_sum >= 0 for item_sum in item_list_sums)
@pytest.mark.parametrize(