This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new bc2aa007 Support arbitrary literal in BETWEEN operator (#2567)
bc2aa007 is described below
commit bc2aa00721652658b9b042b7a7d837bb7b547261
Author: jtuglu1 <[email protected]>
AuthorDate: Sun Oct 5 12:29:03 2025 -0700
Support arbitrary literal in BETWEEN operator (#2567)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
Want to support calling BETWEEN for any valid column types, not just
numeric columns. This extends support for filter expressions like
`date_col BETWEEN '2025-01-01' AND '2025-01-02'`.
The `test_invalid_between` test was removed in favor of letting the type
checks happen at evaluation time (when the literals are attempted to be
cast to the corresponding column's type for comparison).
## Are these changes tested?
Yes, tested locally applying filters to tables.
## Are there any user-facing changes?
Yes, `BETWEEN` operator signature has changed, it nows supports all
comparable column types.
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
mkdocs/docs/row-filter-syntax.md | 4 +++-
pyiceberg/expressions/parser.py | 3 +--
tests/expressions/test_parser.py | 26 ++++++++++++++------------
3 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/mkdocs/docs/row-filter-syntax.md b/mkdocs/docs/row-filter-syntax.md
index ce3b46c0..bffb97c2 100644
--- a/mkdocs/docs/row-filter-syntax.md
+++ b/mkdocs/docs/row-filter-syntax.md
@@ -102,11 +102,13 @@ column NOT LIKE 'prefix%'
## BETWEEN
-The BETWEEN operator filters a numeric value against an inclusive range, e.g.
`a between 1 and 2` is equivalent to `a >= 1 and a <= 2`.
+The BETWEEN operator filters a column against an inclusive range of two
comparable literals, e.g. `a between 1 and 2` is equivalent to `a >= 1 and a <=
2`.
```sql
column BETWEEN 1 AND 2
column BETWEEN 1.0 AND 2.0
+column BETWEEN '2025-01-01' AND '2025-01-02'
+column BETWEEN '2025-01-01T00:00:00.000000' AND '2025-01-02T12:00:00.000000'
```
## Logical Operations
diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index 19663638..60846791 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -107,7 +107,6 @@ boolean = one_of(["true", "false"],
caseless=True).set_results_name("boolean")
string = sgl_quoted_string.set_results_name("raw_quoted_string")
decimal = common.real().set_results_name("decimal")
integer = common.signed_integer().set_results_name("integer")
-number = common.number().set_results_name("number")
literal = Group(string | decimal | integer |
boolean).set_results_name("literal")
literal_set = Group(
DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) |
DelimitedList(boolean)
@@ -151,7 +150,7 @@ comparison_op = one_of(["<", "<=", ">", ">=", "=", "==",
"!=", "<>"], caseless=T
left_ref = column + comparison_op + literal
right_ref = literal + comparison_op + column
comparison = left_ref | right_ref
-between = column + BETWEEN + number + AND + number
+between = column + BETWEEN + literal + AND + literal
@between.set_parse_action
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 152ac03e..28d7cf11 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -42,7 +42,7 @@ from pyiceberg.expressions import (
Reference,
StartsWith,
)
-from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral
+from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral, literal
def test_always_true() -> None:
@@ -241,7 +241,8 @@ def test_quoted_column_with_spaces() -> None:
assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'")
-def test_valid_between() -> None:
+def test_valid_between_with_numerics() -> None:
+ # numerics
assert And(
left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)),
right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3)),
@@ -254,16 +255,17 @@ def test_valid_between() -> None:
left=GreaterThanOrEqual(Reference(name="foo"),
DecimalLiteral(Decimal(1.0))),
right=LessThanOrEqual(Reference(name="foo"),
DecimalLiteral(Decimal(4.0))),
) == parser.parse("foo between 1.0 and 4.0")
- assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and
foo <= 3")
+ # dates
+ assert And(
+ left=GreaterThanOrEqual(Reference(name="foo"), literal("2025-05-10")),
+ right=LessThanOrEqual(Reference(name="foo"), literal("2025-05-12")),
+ ) == parser.parse("foo between '2025-05-10' and '2025-05-12'")
-def test_invalid_between() -> None:
- # boolean
- with pytest.raises(ParseException) as exc_info:
- parser.parse("foo between true and false")
- assert "Expected number, found 'true'" in str(exc_info)
+ # timestamps
+ assert And(
+ left=GreaterThanOrEqual(Reference(name="foo"),
literal("2025-01-01T00:00:00.000000")),
+ right=LessThanOrEqual(Reference(name="foo"),
literal("2025-01-10T12:00:00.000000")),
+ ) == parser.parse("foo between '2025-01-01T00:00:00.000000' and
'2025-01-10T12:00:00.000000'")
- # string
- with pytest.raises(ParseException) as exc_info:
- parser.parse("foo between 'a' and 'b'")
- assert 'Expected number, found "\'"' in str(exc_info)
+ assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and
foo <= 3")