Fokko commented on code in PR #5362:
URL: https://github.com/apache/iceberg/pull/5362#discussion_r930724220


##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,107 +349,104 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
-
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no 
Literals.")
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    inverse: ClassVar[type]
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> 
BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no 
Literals.")
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression):
+    inverse: ClassVar[type]
+    term: BoundTerm[T]
 
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no 
Literals.")
+class IsNull(UnaryPredicate[T]):
+    pass
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class NotNull(UnaryPredicate[T]):
+    pass
 
-class BoundNotNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNull:
-        return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no 
Literals.")
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no 
Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+IsNull.inverse = NotNull
+IsNull.as_bound = BoundIsNull
+NotNull.inverse = IsNull
+NotNull.as_bound = BoundNotNull
+BoundIsNull.inverse = BoundNotNull
+BoundNotNull.inverse = BoundIsNull
 
 
-class BoundIsNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNaN:
-        return BoundNotNaN(self.term)
+class IsNaN(UnaryPredicate[T]):
+    pass
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no 
Literals.")
 
+class NotNaN(UnaryPredicate[T]):
+    pass
 
-class NotNaN(UnboundPredicate[T]):
-    def __invert__(self) -> IsNaN:
-        return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no 
Literals.")
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if bound_type == FloatType() or bound_type == DoubleType():

Review Comment:
   This way we don't have to fetch the actual `{Float,Double}Type`.
   ```suggestion
           if type(bound_type) in {FloatType, DoubleType}:
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,107 +349,104 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
-
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no 
Literals.")
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    inverse: ClassVar[type]
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> 
BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no 
Literals.")
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression):
+    inverse: ClassVar[type]
+    term: BoundTerm[T]
 
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no 
Literals.")
+class IsNull(UnaryPredicate[T]):
+    pass
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class NotNull(UnaryPredicate[T]):
+    pass
 
-class BoundNotNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNull:
-        return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no 
Literals.")
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no 
Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+IsNull.inverse = NotNull
+IsNull.as_bound = BoundIsNull
+NotNull.inverse = IsNull
+NotNull.as_bound = BoundNotNull
+BoundIsNull.inverse = BoundNotNull
+BoundNotNull.inverse = BoundIsNull

Review Comment:
   I like it that we only store the types. I'm unsure if reimplementing would 
cause issues. I would probably override the `__invert__` method in 
`UnaryPredicate`:
   ```python
       @abstractmethod
       def __invert__(self) -> BooleanExpression:
           pass
   ```
   The return type would then change to `BooleanExpression` as it can return a 
`UnaryPredicate`, but also a `Always{True,False}`. This is nice because it 
gives us typing and we don't flatten everything to `type`:
   
   And then implement them in the function itself. This keeps everything in one 
place, and we make sure that we implement all the methods:
   
   By pytest:
   ```python
       def test_isnull_bind_required():
           schema = Schema(NestedField(2, "a", IntegerType(), required=True), 
schema_id=1)
   >       assert base.IsNull(base.Reference("a")).bind(schema) == 
base.AlwaysFalse()
   E       TypeError: Can't instantiate abstract class IsNull with abstract 
method __invert__
   ```
   
   Also by static analysis:
   ```python
   python/pyiceberg/expressions/base.py:393: error: Cannot instantiate abstract 
class "NotNull" with abstract attribute "__invert__"
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,107 +349,104 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
-
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no 
Literals.")
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    inverse: ClassVar[type]
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> 
BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no 
Literals.")
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression):
+    inverse: ClassVar[type]
+    term: BoundTerm[T]
 
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no 
Literals.")
+class IsNull(UnaryPredicate[T]):
+    pass
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class NotNull(UnaryPredicate[T]):
+    pass
 
-class BoundNotNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNull:
-        return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no 
Literals.")
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no 
Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+IsNull.inverse = NotNull
+IsNull.as_bound = BoundIsNull
+NotNull.inverse = IsNull
+NotNull.as_bound = BoundNotNull
+BoundIsNull.inverse = BoundNotNull
+BoundNotNull.inverse = BoundIsNull
 
 
-class BoundIsNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNaN:
-        return BoundNotNaN(self.term)
+class IsNaN(UnaryPredicate[T]):
+    pass
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no 
Literals.")
 
+class NotNaN(UnaryPredicate[T]):
+    pass
 
-class NotNaN(UnboundPredicate[T]):
-    def __invert__(self) -> IsNaN:
-        return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no 
Literals.")
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if bound_type == FloatType() or bound_type == DoubleType():
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
 
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if bound_type == FloatType() or bound_type == DoubleType():

Review Comment:
   ```suggestion
           if type(bound_type) in {FloatType, DoubleType}:
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to