rdblue commented on code in PR #4816:
URL: https://github.com/apache/iceberg/pull/4816#discussion_r917458677
##########
python/pyiceberg/expressions/base.py:
##########
@@ -131,11 +75,115 @@ def __ge__(self, other):
class BooleanExpression(ABC):
- """base class for all boolean expressions"""
+ """Represents a boolean expression tree."""
@abstractmethod
def __invert__(self) -> "BooleanExpression":
- ...
+ """Transform the Expression into its negated version."""
+
+
+class Bound(Generic[T], ABC):
+ """Represents a bound value expression."""
+
+ def eval(self, struct: StructProtocol): # pylint: disable=W0613
+ ... # pragma: no cover
+
+
+class Unbound(Generic[T, B], ABC):
+ """Represents an unbound expression node."""
+
+ @abstractmethod
+ def bind(self, schema: Schema, case_sensitive: bool) -> B:
+ ... # pragma: no cover
+
+
+class Term(ABC):
+ """An expression that evaluates to a value."""
+
+
+class BaseReference(Generic[T], Term, ABC):
+ """Represents a variable reference in an expression."""
+
+
+class BoundTerm(Bound[T], Term):
+ """Represents a bound term."""
+
+
+class UnboundTerm(Unbound[T, BoundTerm[T]], Term):
+ """Represents an unbound term."""
+
+
+@dataclass(frozen=True)
+class BoundReference(BoundTerm[T], BaseReference[T]):
+ """A reference bound to a field in a schema
+
+ Args:
+ field (NestedField): A referenced field in an Iceberg schema
+ accessor (Accessor): An Accessor object to access the value at the
field's position
+ """
+
+ field: NestedField
+ accessor: Accessor
+
+ def eval(self, struct: StructProtocol) -> Any:
+ """Returns the value at the referenced field's position in an object
that abides by the StructProtocol
+ Args:
+ struct (StructProtocol): A row object that abides by the
StructProtocol and returns values given a position
+ Returns:
+ Any: The value at the referenced field's position in `struct`
+ """
+ return self.accessor.get(struct)
+
+
+@dataclass(frozen=True)
+class Reference(UnboundTerm[T], BaseReference[T]):
+ """A reference not yet bound to a field in a schema
+
+ Args:
+ name (str): The name of the field
+
+ Note:
+ An unbound reference is sometimes referred to as a "named" reference
+ """
+
+ name: str
+
+ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
+ """Bind the reference to an Iceberg schema
+
+ Args:
+ schema (Schema): An Iceberg schema
+ case_sensitive (bool): Whether to consider case when binding the
reference to the field
+
+ Raises:
+ ValueError: If an empty name is provided
+
+ Returns:
+ BoundReference: A reference bound to the specific field in the
Iceberg schema
+ """
+ field = schema.find_field(name_or_id=self.name,
case_sensitive=case_sensitive)
+
+ if not field:
+ raise ValueError(f"Cannot find field '{self.name}' in schema:
{schema}")
+
+ accessor = schema.accessor_for_field(field.field_id)
+
+ if not accessor:
+ raise ValueError(f"Cannot find accessor for field '{self.name}' in
schema: {schema}")
+
+ return BoundReference(field=field, accessor=accessor)
+
+
+@dataclass(frozen=True) # type: ignore[misc]
+class BoundPredicate(Bound[T], BooleanExpression):
Review Comment:
I think this would be a `BoundSetPredicate`. Other bound predicates have
only one or zero literals.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]