(iceberg-python) branch main updated: Add support for boolean expressions and quoted columns (#1286)

fokko Wed, 06 Nov 2024 13:10:21 -0800

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git



The following commit(s) were added to refs/heads/main by this push:
     new e9c31706 Add support for boolean expressions and quoted columns (#1286)
e9c31706 is described below

commit e9c31706845ffb12429d0f656f25ed09bf9518f9
Author: MoSheikh <[email protected]>
AuthorDate: Wed Nov 6 15:09:00 2024 -0600

    Add support for boolean expressions and quoted columns (#1286)
    
    * Add support for boolean expressions and quoted columns
    
    * Add AlwaysTrue & AlwaysFalse support plus tests
    
    * Add test for quoted column
    
    * Remove commented code
    
    ---------
    
    Co-authored-by: Mohammad Sheikh <[email protected]>
---
 pyiceberg/expressions/parser.py  | 49 ++++++++++++++++++++++++++++------------
 tests/expressions/test_parser.py | 18 +++++++++++++--
 2 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index 61aa1647..dcd8dceb 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -21,6 +21,7 @@ from pyparsing import (
     CaselessKeyword,
     DelimitedList,
     Group,
+    MatchFirst,
     ParserElement,
     ParseResults,
     Suppress,
@@ -57,6 +58,7 @@ from pyiceberg.expressions import (
     StartsWith,
 )
 from pyiceberg.expressions.literals import (
+    BooleanLiteral,
     DecimalLiteral,
     Literal,
     LongLiteral,
@@ -77,7 +79,9 @@ NULL = CaselessKeyword("null")
 NAN = CaselessKeyword("nan")
 LIKE = CaselessKeyword("like")
 
-identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
+unquoted_identifier = Word(alphas, alphanums + "_$")
+quoted_identifier = Suppress('"') + unquoted_identifier + Suppress('"')
+identifier = MatchFirst([unquoted_identifier, 
quoted_identifier]).set_results_name("identifier")
 column = DelimitedList(identifier, delim=".", 
combine=False).set_results_name("column")
 
 like_regex = r"(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)"
@@ -100,16 +104,18 @@ boolean = one_of(["true", "false"], 
caseless=True).set_results_name("boolean")
 string = sgl_quoted_string.set_results_name("raw_quoted_string")
 decimal = common.real().set_results_name("decimal")
 integer = common.signed_integer().set_results_name("integer")
-literal = Group(string | decimal | integer).set_results_name("literal")
-literal_set = Group(DelimitedList(string) | DelimitedList(decimal) | 
DelimitedList(integer)).set_results_name("literal_set")
+literal = Group(string | decimal | integer | 
boolean).set_results_name("literal")
+literal_set = Group(
+    DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | 
DelimitedList(boolean)
+).set_results_name("literal_set")
 
 
 @boolean.set_parse_action
-def _(result: ParseResults) -> BooleanExpression:
+def _(result: ParseResults) -> Literal[bool]:
     if strtobool(result.boolean):
-        return AlwaysTrue()
+        return BooleanLiteral(True)
     else:
-        return AlwaysFalse()
+        return BooleanLiteral(False)
 
 
 @string.set_parse_action
@@ -265,14 +271,29 @@ def handle_or(result: ParseResults) -> Or:
     return Or(*result[0])
 
 
-boolean_expression = infix_notation(
-    predicate,
-    [
-        (Suppress(NOT), 1, opAssoc.RIGHT, handle_not),
-        (Suppress(AND), 2, opAssoc.LEFT, handle_and),
-        (Suppress(OR), 2, opAssoc.LEFT, handle_or),
-    ],
-).set_name("expr")
+def handle_always_expression(result: ParseResults) -> BooleanExpression:
+    # If the entire result is "true" or "false", return AlwaysTrue or 
AlwaysFalse
+    expr = result[0]
+    if isinstance(expr, BooleanLiteral):
+        if expr.value:
+            return AlwaysTrue()
+        else:
+            return AlwaysFalse()
+    return result[0]
+
+
+boolean_expression = (
+    infix_notation(
+        predicate,
+        [
+            (Suppress(NOT), 1, opAssoc.RIGHT, handle_not),
+            (Suppress(AND), 2, opAssoc.LEFT, handle_and),
+            (Suppress(OR), 2, opAssoc.LEFT, handle_or),
+        ],
+    )
+    .set_name("expr")
+    .add_parse_action(handle_always_expression)
+)
 
 
 def parse(expr: str) -> BooleanExpression:
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 0bccc9b8..6096b10f 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -41,14 +41,28 @@ from pyiceberg.expressions import (
 )
 
 
-def test_true() -> None:
+def test_always_true() -> None:
     assert AlwaysTrue() == parser.parse("true")
 
 
-def test_false() -> None:
+def test_always_false() -> None:
     assert AlwaysFalse() == parser.parse("false")
 
 
+def test_quoted_column() -> None:
+    assert EqualTo("foo", True) == parser.parse('"foo" = TRUE')
+
+
+def test_equals_true() -> None:
+    assert EqualTo("foo", True) == parser.parse("foo = true")
+    assert EqualTo("foo", True) == parser.parse("foo == TRUE")
+
+
+def test_equals_false() -> None:
+    assert EqualTo("foo", False) == parser.parse("foo = false")
+    assert EqualTo("foo", False) == parser.parse("foo == FALSE")
+
+
 def test_is_null() -> None:
     assert IsNull("foo") == parser.parse("foo is null")
     assert IsNull("foo") == parser.parse("foo IS NULL")

(iceberg-python) branch main updated: Add support for boolean expressions and quoted columns (#1286)

Reply via email to