[iceberg] branch master updated: Python: Update pre-commit to the latest version (#7436)

fokko Mon, 01 May 2023 14:39:00 -0700

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git



The following commit(s) were added to refs/heads/master by this push:
     new 4ce5318e5c Python: Update pre-commit to the latest version (#7436)
4ce5318e5c is described below

commit 4ce5318e5cfae71622c7e980ae660f7a66f443de
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon May 1 23:38:50 2023 +0200

    Python: Update pre-commit to the latest version (#7436)
    
    Removed some ignores and removed some list materialization
---
 python/.pre-commit-config.yaml           | 15 +++++++-------
 python/pyiceberg/conversions.py          |  6 +++---
 python/pyiceberg/expressions/__init__.py |  2 +-
 python/pyiceberg/expressions/literals.py | 15 +++++++-------
 python/pyiceberg/expressions/visitors.py | 34 ++++++++++++++++----------------
 python/pyiceberg/schema.py               |  2 +-
 python/pyiceberg/typedef.py              |  2 +-
 python/pyiceberg/utils/decimal.py        |  2 +-
 python/tests/test_transforms.py          |  8 +++-----
 python/tests/utils/test_bin_packing.py   |  2 +-
 10 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml
index 2abac29d27..1c4de6121e 100644
--- a/python/.pre-commit-config.yaml
+++ b/python/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
       - id: check-yaml
       - id: check-ast
   - repo: https://github.com/ambv/black
-    rev: 23.1.0
+    rev: 23.3.0
     hooks:
       - id: black
   - repo: https://github.com/pre-commit/mirrors-isort
@@ -38,13 +38,13 @@ repos:
       - id: isort
         args: [--settings-path=python/pyproject.toml]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.991
+    rev: v1.2.0
     hooks:
       - id: mypy
         args:
           [--install-types, --non-interactive, --config=python/pyproject.toml]
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v2.7.1
+    rev: v3.0.0-alpha.9-for-vscode
     hooks:
       - id: prettier
         args: [--plugin=prettier-plugin-toml]
@@ -57,12 +57,12 @@ repos:
       - id: pycln
         args: [--config=python/pyproject.toml]
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.3.2
     hooks:
       - id: pyupgrade
         args: [--py38-plus, --keep-runtime-typing]
   - repo: https://github.com/pycqa/pylint
-    rev: v2.16.0
+    rev: v3.0.0a6
     hooks:
       - id: pylint
         args: [--rcfile=python/pylintrc]
@@ -70,9 +70,10 @@ repos:
     rev: "6.0.0"
     hooks:
       - id: flake8
-        args: ["--ignore=E501,W503,E203,B024"]
+        args: ["--ignore=E501,W503,E203,B024,B028"]
         additional_dependencies:
-          [flake8-bugbear==22.12.6, flake8-comprehensions==3.10.1]
+          - flake8-bugbear==23.3.23
+          - flake8-comprehensions==3.12.0
   - repo: https://github.com/executablebooks/mdformat
     rev: 0.7.16
     hooks:
diff --git a/python/pyiceberg/conversions.py b/python/pyiceberg/conversions.py
index 37751a0280..075db4be6d 100644
--- a/python/pyiceberg/conversions.py
+++ b/python/pyiceberg/conversions.py
@@ -230,9 +230,9 @@ def _(primitive_type: DecimalType, value: Decimal) -> bytes:
         bytes: The byte representation of `value`
     """
     _, digits, exponent = value.as_tuple()
-
-    if -exponent != primitive_type.scale:
-        raise ValueError(f"Cannot serialize value, scale of value does not 
match type {primitive_type}: {-exponent}")
+    exponent = abs(int(exponent))
+    if exponent != primitive_type.scale:
+        raise ValueError(f"Cannot serialize value, scale of value does not 
match type {primitive_type}: {exponent}")
     elif len(digits) > primitive_type.precision:
         raise ValueError(
             f"Cannot serialize value, precision of value is greater than 
precision of type {primitive_type}: {len(digits)}"
diff --git a/python/pyiceberg/expressions/__init__.py 
b/python/pyiceberg/expressions/__init__.py
index 6780c6dca4..5be291b0e3 100644
--- a/python/pyiceberg/expressions/__init__.py
+++ b/python/pyiceberg/expressions/__init__.py
@@ -589,7 +589,7 @@ class NotIn(SetPredicate[L], ABC):
         if count == 0:
             return AlwaysTrue()
         elif count == 1:
-            return NotEqualTo(term, next(iter(literals_set)))  # type: ignore
+            return NotEqualTo(term, next(iter(literals_set)))
         else:
             return super().__new__(cls)
 
diff --git a/python/pyiceberg/expressions/literals.py 
b/python/pyiceberg/expressions/literals.py
index 9aa43531a3..8414ce533e 100644
--- a/python/pyiceberg/expressions/literals.py
+++ b/python/pyiceberg/expressions/literals.py
@@ -122,7 +122,7 @@ def literal(value: L) -> Literal[L]:
         LongLiteral(123)
     """
     if isinstance(value, float):
-        return DoubleLiteral(value)
+        return DoubleLiteral(value)  # type: ignore
     elif isinstance(value, bool):
         return BooleanLiteral(value)
     elif isinstance(value, int):
@@ -437,12 +437,12 @@ class DecimalLiteral(Literal[Decimal]):
         super().__init__(value, Decimal)
 
     def increment(self) -> Literal[Decimal]:
-        original_scale = abs(self.value.as_tuple().exponent)
+        original_scale = abs(int(self.value.as_tuple().exponent))
         unscaled = decimal_to_unscaled(self.value)
         return DecimalLiteral(unscaled_to_decimal(unscaled + 1, 
original_scale))
 
     def decrement(self) -> Literal[Decimal]:
-        original_scale = abs(self.value.as_tuple().exponent)
+        original_scale = abs(int(self.value.as_tuple().exponent))
         unscaled = decimal_to_unscaled(self.value)
         return DecimalLiteral(unscaled_to_decimal(unscaled - 1, 
original_scale))
 
@@ -452,7 +452,7 @@ class DecimalLiteral(Literal[Decimal]):
 
     @to.register(DecimalType)
     def _(self, type_var: DecimalType) -> Literal[Decimal]:
-        if type_var.scale == abs(self.value.as_tuple().exponent):
+        if type_var.scale == abs(int(self.value.as_tuple().exponent)):
             return self
         raise ValueError(f"Could not convert {self.value} into a {type_var}")
 
@@ -558,12 +558,11 @@ class StringLiteral(Literal[str]):
     @to.register(DecimalType)
     def _(self, type_var: DecimalType) -> Literal[Decimal]:
         dec = Decimal(self.value)
-        if type_var.scale == abs(dec.as_tuple().exponent):
+        scale = abs(int(dec.as_tuple().exponent))
+        if type_var.scale == scale:
             return DecimalLiteral(dec)
         else:
-            raise ValueError(
-                f"Could not convert {self.value} into a {type_var}, scales 
differ {type_var.scale} <> {abs(dec.as_tuple().exponent)}"
-            )
+            raise ValueError(f"Could not convert {self.value} into a 
{type_var}, scales differ {type_var.scale} <> {scale}")
 
     @to.register(BooleanType)
     def _(self, type_var: BooleanType) -> Literal[bool]:
diff --git a/python/pyiceberg/expressions/visitors.py 
b/python/pyiceberg/expressions/visitors.py
index 33274c7903..4de1ce4236 100644
--- a/python/pyiceberg/expressions/visitors.py
+++ b/python/pyiceberg/expressions/visitors.py
@@ -1197,8 +1197,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
         if not isinstance(field.field_type, PrimitiveType):
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
-        if lower_bound_bytes := self.lower_bounds.get(field_id):  # type: 
ignore
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)  # 
type: ignore
+        if lower_bound_bytes := self.lower_bounds.get(field_id):
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
 
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
@@ -1219,8 +1219,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
         if not isinstance(field.field_type, PrimitiveType):
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
-        if lower_bound_bytes := self.lower_bounds.get(field_id):  # type: 
ignore
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)  # 
type: ignore
+        if lower_bound_bytes := self.lower_bounds.get(field_id):
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1240,8 +1240,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
         if not isinstance(field.field_type, PrimitiveType):
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
-        if upper_bound_bytes := self.upper_bounds.get(field_id):  # type: 
ignore
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)  # 
type: ignore
+        if upper_bound_bytes := self.upper_bounds.get(field_id):
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             if upper_bound <= literal.value:
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
@@ -1261,8 +1261,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
         if not isinstance(field.field_type, PrimitiveType):
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
-        if upper_bound_bytes := self.upper_bounds.get(field_id):  # type: 
ignore
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)  # 
type: ignore
+        if upper_bound_bytes := self.upper_bounds.get(field_id):
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             if upper_bound < literal.value:
                 if self._is_nan(upper_bound):
                     # NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
@@ -1282,8 +1282,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
         if not isinstance(field.field_type, PrimitiveType):
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
-        if lower_bound_bytes := self.lower_bounds.get(field_id):  # type: 
ignore
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)  # 
type: ignore
+        if lower_bound_bytes := self.lower_bounds.get(field_id):
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1291,8 +1291,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
             if lower_bound > literal.value:
                 return ROWS_CANNOT_MATCH
 
-        if upper_bound_bytes := self.upper_bounds.get(field_id):  # type: 
ignore
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)  # 
type: ignore
+        if upper_bound_bytes := self.upper_bounds.get(field_id):
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             if self._is_nan(upper_bound):
                 # NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1319,8 +1319,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
         if not isinstance(field.field_type, PrimitiveType):
             raise ValueError(f"Expected PrimitiveType: {field.field_type}")
 
-        if lower_bound_bytes := self.lower_bounds.get(field_id):  # type: 
ignore
-            lower_bound = from_bytes(field.field_type, lower_bound_bytes)  # 
type: ignore
+        if lower_bound_bytes := self.lower_bounds.get(field_id):
+            lower_bound = from_bytes(field.field_type, lower_bound_bytes)
             if self._is_nan(lower_bound):
                 # NaN indicates unreliable bounds. See the 
InclusiveMetricsEvaluator docs for more.
                 return ROWS_MIGHT_MATCH
@@ -1329,8 +1329,8 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
             if len(literals) == 0:
                 return ROWS_CANNOT_MATCH
 
-        if upper_bound_bytes := self.upper_bounds.get(field_id):  # type: 
ignore
-            upper_bound = from_bytes(field.field_type, upper_bound_bytes)  # 
type: ignore
+        if upper_bound_bytes := self.upper_bounds.get(field_id):
+            upper_bound = from_bytes(field.field_type, upper_bound_bytes)
             # this is different from Java, here NaN is always larger
             if self._is_nan(upper_bound):
                 return ROWS_MIGHT_MATCH
@@ -1367,7 +1367,7 @@ class 
_InclusiveMetricsEvaluator(BoundBooleanExpressionVisitor[bool]):
                 return ROWS_CANNOT_MATCH
 
         if upper_bound_bytes := self.upper_bounds.get(field_id):
-            upper_bound = str(from_bytes(field.field_type, upper_bound_bytes)) 
 # type: ignore
+            upper_bound = str(from_bytes(field.field_type, upper_bound_bytes))
 
             # truncate upper bound so that its length is not greater than the 
length of prefix
             if upper_bound is not None and upper_bound[:len_prefix] < prefix:
diff --git a/python/pyiceberg/schema.py b/python/pyiceberg/schema.py
index ae1f532b68..7c99d458f3 100644
--- a/python/pyiceberg/schema.py
+++ b/python/pyiceberg/schema.py
@@ -107,7 +107,7 @@ class Schema(IcebergBaseModel):
             return False
 
         identifier_field_ids_is_equal = self.identifier_field_ids == 
other.identifier_field_ids
-        schema_is_equal = all([lhs == rhs for lhs, rhs in zip(self.columns, 
other.columns)])
+        schema_is_equal = all(lhs == rhs for lhs, rhs in zip(self.columns, 
other.columns))
 
         return identifier_field_ids_is_equal and schema_is_equal
 
diff --git a/python/pyiceberg/typedef.py b/python/pyiceberg/typedef.py
index bdf467101c..23d3217a9c 100644
--- a/python/pyiceberg/typedef.py
+++ b/python/pyiceberg/typedef.py
@@ -132,7 +132,7 @@ class Record(StructProtocol):
             self._position_to_field_name = {idx: field.name for idx, field in 
enumerate(struct.fields)}
         elif named_data:
             # Order of named_data is preserved (PEP 468) so this can be used 
to generate the position dict
-            self._position_to_field_name = {idx: name for idx, name in 
enumerate(named_data.keys())}
+            self._position_to_field_name = dict(enumerate(named_data.keys()))
         else:
             self._position_to_field_name = {idx: f"field{idx + 1}" for idx in 
range(len(data))}
 
diff --git a/python/pyiceberg/utils/decimal.py 
b/python/pyiceberg/utils/decimal.py
index 40bc087390..fb58034e68 100644
--- a/python/pyiceberg/utils/decimal.py
+++ b/python/pyiceberg/utils/decimal.py
@@ -87,4 +87,4 @@ def truncate_decimal(value: Decimal, width: int) -> Decimal:
     """
     unscaled_value = decimal_to_unscaled(value)
     applied_value = unscaled_value - (((unscaled_value % width) + width) % 
width)
-    return unscaled_to_decimal(applied_value, -value.as_tuple().exponent)
+    return unscaled_to_decimal(applied_value, 
abs(int(value.as_tuple().exponent)))
diff --git a/python/tests/test_transforms.py b/python/tests/test_transforms.py
index ba03393dd8..201dbbbd6d 100644
--- a/python/tests/test_transforms.py
+++ b/python/tests/test_transforms.py
@@ -772,9 +772,7 @@ def 
test_projection_identity_unary(bound_reference_timestamp: BoundReference[int
 def test_projection_identity_literal(bound_reference_timestamp: 
BoundReference[int]) -> None:
     assert IdentityTransform().project(
         "name", BoundEqualTo(term=bound_reference_timestamp, 
literal=TimestampLiteral(TIMESTAMP_EXAMPLE))
-    ) == EqualTo(
-        term="name", literal=TimestampLiteral(TIMESTAMP_EXAMPLE)  # type: 
ignore
-    )
+    ) == EqualTo(term="name", literal=TimestampLiteral(TIMESTAMP_EXAMPLE))
 
 
 def test_projection_identity_set_in(bound_reference_timestamp: 
BoundReference[int]) -> None:
@@ -786,7 +784,7 @@ def 
test_projection_identity_set_in(bound_reference_timestamp: BoundReference[in
         ),
     ) == In(
         term="name",
-        literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), 
TimestampLiteral(TIMESTAMP_EXAMPLE)},  # type: ignore
+        literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), 
TimestampLiteral(TIMESTAMP_EXAMPLE)},
     )
 
 
@@ -799,7 +797,7 @@ def 
test_projection_identity_set_not_in(bound_reference_timestamp: BoundReferenc
         ),
     ) == NotIn(
         term="name",
-        literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), 
TimestampLiteral(TIMESTAMP_EXAMPLE)},  # type: ignore
+        literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), 
TimestampLiteral(TIMESTAMP_EXAMPLE)},
     )
 
 
diff --git a/python/tests/utils/test_bin_packing.py 
b/python/tests/utils/test_bin_packing.py
index c021c94d25..054ea79556 100644
--- a/python/tests/utils/test_bin_packing.py
+++ b/python/tests/utils/test_bin_packing.py
@@ -41,7 +41,7 @@ def test_bin_packing(splits: List[int], lookback: int, 
split_size: int, open_cos
         return max(x, open_cost)
 
     item_list_sums: List[int] = [sum(item) for item in PackingIterator(splits, 
split_size, lookback, weight_func)]
-    assert all([split_size >= item_sum >= 0 for item_sum in item_list_sums])
+    assert all(split_size >= item_sum >= 0 for item_sum in item_list_sums)
 
 
 @pytest.mark.parametrize(

[iceberg] branch master updated: Python: Update pre-commit to the latest version (#7436)

Reply via email to