rymurr commented on a change in pull request #2399:
URL: https://github.com/apache/iceberg/pull/2399#discussion_r605580800
##########
File path: python/iceberg/api/expressions/predicate.py
##########
@@ -15,23 +15,39 @@
# specific language governing permissions and limitations
# under the License.
+from __future__ import annotations
+
+from typing import List, TYPE_CHECKING, Union
+
from iceberg.exceptions import ValidationException
from .expression import (Expression,
- FALSE,
- Operation,
- TRUE)
+ Operation)
from .literals import (Literal,
Literals)
-from .reference import BoundReference
+from .term import BoundTerm, Term
+from ..types import TypeID
+
+if TYPE_CHECKING:
+ from iceberg.api import StructLike
class Predicate(Expression):
- def __init__(self, op, ref, lit):
- self.op = op
- self.ref = ref
- self.lit = lit
+ def __init__(self, op, term):
Review comment:
nit: can you add types to the `op` and `term` here?
##########
File path: python/iceberg/api/expressions/predicate.py
##########
@@ -70,72 +86,152 @@ def __str__(self):
class BoundPredicate(Predicate):
- def __init__(self, op, ref, lit=None):
- super(BoundPredicate, self).__init__(op, ref, lit)
+ def __init__(self, op: Operation, term: BoundTerm, lit: Literal = None,
literals: List[Literal] = None,
+ is_unary_predicate: bool = False, is_literal_predicate: bool
= False,
+ is_set_predicate: bool = False):
+ super(BoundPredicate, self).__init__(op, term)
+ ValidationException.check(sum([is_unary_predicate,
is_literal_predicate, is_set_predicate]) == 1,
+ "Only a single predicate type may be set:
%s=%s, %s=%s, %s=%s",
+ ("is_unary_predicate", is_unary_predicate,
+ "is_literal_predicate",
is_literal_predicate,
+ "is_set_predicate", is_set_predicate))
+
+ self._literals: Union[None, List[Literal]] = None
+ if is_unary_predicate:
+ ValidationException.check(lit is None, "Unary Predicates may not
have a literal", ())
+
+ elif is_literal_predicate:
+ ValidationException.check(lit is not None, "Literal Predicates
must have a literal set", ())
+ self._literals = [lit] # type: ignore
+
+ elif is_set_predicate:
+ ValidationException.check(literals is not None, "Set Predicates
must have literals set", ())
+ self._literals = literals
+ else:
+ raise ValueError(f"Unable to instantiate {op} -> (lit={lit},
literal={literals}")
- def negate(self):
- return BoundPredicate(self.op.negate(), self.ref, self.lit)
+ @property
+ def lit(self) -> Union[None, Literal]:
Review comment:
nit: `Optional` may be cleaner than `Union[None, T]`
##########
File path: python/iceberg/api/expressions/term.py
##########
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from iceberg.api.types import StructType
+
+if TYPE_CHECKING:
+ from iceberg.api import StructLike
+
+
+class Term(object):
+ pass
+
+
+class BoundTerm(Term):
+
+ @property
+ def ref(self):
+ raise NotImplementedError("Base class does not have implementation")
+
+ @property
+ def type(self):
+ raise NotImplementedError("Base class does not have implementation")
+
+ def eval(self, struct: 'StructLike'):
Review comment:
nit: `'StructLike'` here and `StructLike` below
##########
File path: python/iceberg/api/expressions/predicate.py
##########
@@ -70,72 +86,152 @@ def __str__(self):
class BoundPredicate(Predicate):
- def __init__(self, op, ref, lit=None):
- super(BoundPredicate, self).__init__(op, ref, lit)
+ def __init__(self, op: Operation, term: BoundTerm, lit: Literal = None,
literals: List[Literal] = None,
+ is_unary_predicate: bool = False, is_literal_predicate: bool
= False,
+ is_set_predicate: bool = False):
+ super(BoundPredicate, self).__init__(op, term)
+ ValidationException.check(sum([is_unary_predicate,
is_literal_predicate, is_set_predicate]) == 1,
+ "Only a single predicate type may be set:
%s=%s, %s=%s, %s=%s",
+ ("is_unary_predicate", is_unary_predicate,
+ "is_literal_predicate",
is_literal_predicate,
+ "is_set_predicate", is_set_predicate))
+
+ self._literals: Union[None, List[Literal]] = None
+ if is_unary_predicate:
+ ValidationException.check(lit is None, "Unary Predicates may not
have a literal", ())
+
+ elif is_literal_predicate:
+ ValidationException.check(lit is not None, "Literal Predicates
must have a literal set", ())
+ self._literals = [lit] # type: ignore
+
+ elif is_set_predicate:
+ ValidationException.check(literals is not None, "Set Predicates
must have literals set", ())
+ self._literals = literals
+ else:
+ raise ValueError(f"Unable to instantiate {op} -> (lit={lit},
literal={literals}")
- def negate(self):
- return BoundPredicate(self.op.negate(), self.ref, self.lit)
+ @property
+ def lit(self) -> Union[None, Literal]:
+ if self._literals is None or len(self._literals) == 0:
+ return None
+ return self._literals[0]
+
+ def eval(self, struct: StructLike) -> bool:
+ raise NotImplementedError("eval not implemented for BoundPredicate")
+
+ def test(self, struct) -> bool:
Review comment:
nit: `struct: StructLike` and `TODO:`
##########
File path: python/iceberg/api/expressions/term.py
##########
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from iceberg.api.types import StructType
+
+if TYPE_CHECKING:
+ from iceberg.api import StructLike
+
+
+class Term(object):
Review comment:
Is the purpose of this class structure to give type safety or as an
abstract class? I don't love these hierarchies in python and found this blog
really interesting:
https://glyph.twistedmatrix.com/2021/03/interfaces-and-protocols.html
By no means do we need to do this here, just some food for thought.
##########
File path: python/iceberg/api/expressions/predicate.py
##########
@@ -70,72 +86,152 @@ def __str__(self):
class BoundPredicate(Predicate):
- def __init__(self, op, ref, lit=None):
- super(BoundPredicate, self).__init__(op, ref, lit)
+ def __init__(self, op: Operation, term: BoundTerm, lit: Literal = None,
literals: List[Literal] = None,
+ is_unary_predicate: bool = False, is_literal_predicate: bool
= False,
+ is_set_predicate: bool = False):
+ super(BoundPredicate, self).__init__(op, term)
+ ValidationException.check(sum([is_unary_predicate,
is_literal_predicate, is_set_predicate]) == 1,
+ "Only a single predicate type may be set:
%s=%s, %s=%s, %s=%s",
+ ("is_unary_predicate", is_unary_predicate,
+ "is_literal_predicate",
is_literal_predicate,
+ "is_set_predicate", is_set_predicate))
+
+ self._literals: Union[None, List[Literal]] = None
+ if is_unary_predicate:
+ ValidationException.check(lit is None, "Unary Predicates may not
have a literal", ())
+
+ elif is_literal_predicate:
+ ValidationException.check(lit is not None, "Literal Predicates
must have a literal set", ())
+ self._literals = [lit] # type: ignore
+
+ elif is_set_predicate:
+ ValidationException.check(literals is not None, "Set Predicates
must have literals set", ())
+ self._literals = literals
+ else:
+ raise ValueError(f"Unable to instantiate {op} -> (lit={lit},
literal={literals}")
- def negate(self):
- return BoundPredicate(self.op.negate(), self.ref, self.lit)
+ @property
+ def lit(self) -> Union[None, Literal]:
+ if self._literals is None or len(self._literals) == 0:
+ return None
+ return self._literals[0]
+
+ def eval(self, struct: StructLike) -> bool:
+ raise NotImplementedError("eval not implemented for BoundPredicate")
+
+ def test(self, struct) -> bool:
+ raise NotImplementedError("TO:DO implement test")
class UnboundPredicate(Predicate):
- def __init__(self, op, named_ref, value=None, lit=None):
- if value is None and lit is None:
- super(UnboundPredicate, self).__init__(op, named_ref, None)
+ def __init__(self, op, term, value=None, lit=None, values=None,
literals=None):
+ self._literals = None
+ num_set_args = sum([1 for x in [value, lit, values, literals] if x is
not None])
+
+ if num_set_args > 1:
+ raise ValueError(f"Only one of value={value}, lit={lit},
values={values}, literals={literals} may be set")
+ super(UnboundPredicate, self).__init__(op, term)
if isinstance(value, Literal):
lit = value
value = None
if value is not None:
- super(UnboundPredicate, self).__init__(op, named_ref,
Literals.from_(value))
+ self._literals = [Literals.from_(value)]
elif lit is not None:
- super(UnboundPredicate, self).__init__(op, named_ref, lit)
+ self._literals = [lit]
+ elif values is not None:
+ self._literals = map(Literals.from_, values)
+ elif literals is not None:
+ self._literals = literals
+
+ @property
+ def literals(self):
+ return self._literals
+
+ @property
+ def lit(self):
+ if self.op in [Operation.IN, Operation.NOT_IN]:
+ raise ValueError(f"{self.op} predicate cannot return a literal")
+
+ return None if self.literals is None else self.literals[0]
def negate(self):
- return UnboundPredicate(self.op.negate(), self.ref, self.lit)
+ return UnboundPredicate(self.op.negate(), self.term,
literals=self.literals)
- def bind(self, struct, case_sensitive=True): # noqa: C901
- if case_sensitive:
- field = struct.field(self.ref.name)
- else:
- field = struct.case_insensitive_field(self.ref.name.lower())
-
- ValidationException.check(field is not None,
- "Cannot find field '%s' in struct %s",
(self.ref.name, struct))
-
- if self.lit is None:
- if self.op == Operation.IS_NULL:
- if field.is_required:
- return FALSE
- return BoundPredicate(Operation.IS_NULL,
BoundReference(struct, field.field_id))
- elif self.op == Operation.NOT_NULL:
- if field.is_required:
- return TRUE
- return BoundPredicate(Operation.NOT_NULL,
BoundReference(struct, field.field_id))
+ def bind(self, struct, case_sensitive=True):
+ bound = self.term.bind(struct, case_sensitive=case_sensitive)
+
+ if self.literals is None:
+ return self.bind_unary_operation(bound)
+ elif self.op in [Operation.IN, Operation.NOT_IN]:
+ return self.bind_in_operation(bound)
+
+ return self.bind_literal_operation(bound)
+
+ def bind_unary_operation(self, bound_term: BoundTerm) -> BoundPredicate:
+ from .expressions import Expressions
Review comment:
just curious why this import is at the method only?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]