This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new e9c31706 Add support for boolean expressions and quoted columns (#1286)
e9c31706 is described below
commit e9c31706845ffb12429d0f656f25ed09bf9518f9
Author: MoSheikh <[email protected]>
AuthorDate: Wed Nov 6 15:09:00 2024 -0600
Add support for boolean expressions and quoted columns (#1286)
* Add support for boolean expressions and quoted columns
* Add AlwaysTrue & AlwaysFalse support plus tests
* Add test for quoted column
* Remove commented code
---------
Co-authored-by: Mohammad Sheikh <[email protected]>
---
pyiceberg/expressions/parser.py | 49 ++++++++++++++++++++++++++++------------
tests/expressions/test_parser.py | 18 +++++++++++++--
2 files changed, 51 insertions(+), 16 deletions(-)
diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index 61aa1647..dcd8dceb 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -21,6 +21,7 @@ from pyparsing import (
CaselessKeyword,
DelimitedList,
Group,
+ MatchFirst,
ParserElement,
ParseResults,
Suppress,
@@ -57,6 +58,7 @@ from pyiceberg.expressions import (
StartsWith,
)
from pyiceberg.expressions.literals import (
+ BooleanLiteral,
DecimalLiteral,
Literal,
LongLiteral,
@@ -77,7 +79,9 @@ NULL = CaselessKeyword("null")
NAN = CaselessKeyword("nan")
LIKE = CaselessKeyword("like")
-identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
+unquoted_identifier = Word(alphas, alphanums + "_$")
+quoted_identifier = Suppress('"') + unquoted_identifier + Suppress('"')
+identifier = MatchFirst([unquoted_identifier,
quoted_identifier]).set_results_name("identifier")
column = DelimitedList(identifier, delim=".",
combine=False).set_results_name("column")
like_regex = r"(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)"
@@ -100,16 +104,18 @@ boolean = one_of(["true", "false"],
caseless=True).set_results_name("boolean")
string = sgl_quoted_string.set_results_name("raw_quoted_string")
decimal = common.real().set_results_name("decimal")
integer = common.signed_integer().set_results_name("integer")
-literal = Group(string | decimal | integer).set_results_name("literal")
-literal_set = Group(DelimitedList(string) | DelimitedList(decimal) |
DelimitedList(integer)).set_results_name("literal_set")
+literal = Group(string | decimal | integer |
boolean).set_results_name("literal")
+literal_set = Group(
+ DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) |
DelimitedList(boolean)
+).set_results_name("literal_set")
@boolean.set_parse_action
-def _(result: ParseResults) -> BooleanExpression:
+def _(result: ParseResults) -> Literal[bool]:
if strtobool(result.boolean):
- return AlwaysTrue()
+ return BooleanLiteral(True)
else:
- return AlwaysFalse()
+ return BooleanLiteral(False)
@string.set_parse_action
@@ -265,14 +271,29 @@ def handle_or(result: ParseResults) -> Or:
return Or(*result[0])
-boolean_expression = infix_notation(
- predicate,
- [
- (Suppress(NOT), 1, opAssoc.RIGHT, handle_not),
- (Suppress(AND), 2, opAssoc.LEFT, handle_and),
- (Suppress(OR), 2, opAssoc.LEFT, handle_or),
- ],
-).set_name("expr")
+def handle_always_expression(result: ParseResults) -> BooleanExpression:
+ # If the entire result is "true" or "false", return AlwaysTrue or
AlwaysFalse
+ expr = result[0]
+ if isinstance(expr, BooleanLiteral):
+ if expr.value:
+ return AlwaysTrue()
+ else:
+ return AlwaysFalse()
+ return result[0]
+
+
+boolean_expression = (
+ infix_notation(
+ predicate,
+ [
+ (Suppress(NOT), 1, opAssoc.RIGHT, handle_not),
+ (Suppress(AND), 2, opAssoc.LEFT, handle_and),
+ (Suppress(OR), 2, opAssoc.LEFT, handle_or),
+ ],
+ )
+ .set_name("expr")
+ .add_parse_action(handle_always_expression)
+)
def parse(expr: str) -> BooleanExpression:
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 0bccc9b8..6096b10f 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -41,14 +41,28 @@ from pyiceberg.expressions import (
)
-def test_true() -> None:
+def test_always_true() -> None:
assert AlwaysTrue() == parser.parse("true")
-def test_false() -> None:
+def test_always_false() -> None:
assert AlwaysFalse() == parser.parse("false")
+def test_quoted_column() -> None:
+ assert EqualTo("foo", True) == parser.parse('"foo" = TRUE')
+
+
+def test_equals_true() -> None:
+ assert EqualTo("foo", True) == parser.parse("foo = true")
+ assert EqualTo("foo", True) == parser.parse("foo == TRUE")
+
+
+def test_equals_false() -> None:
+ assert EqualTo("foo", False) == parser.parse("foo = false")
+ assert EqualTo("foo", False) == parser.parse("foo == FALSE")
+
+
def test_is_null() -> None:
assert IsNull("foo") == parser.parse("foo is null")
assert IsNull("foo") == parser.parse("foo IS NULL")