JingsongLi commented on code in PR #7844: URL: https://github.com/apache/paimon/pull/7844#discussion_r3246501531
########## paimon-python/pypaimon/daft/daft_predicate_visitor.py: ########## @@ -0,0 +1,243 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Filter conversion utilities for Paimon pushdowns. + +This module provides utilities to convert Daft expressions to Paimon predicates +for filter pushdown optimization using the Visitor pattern. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any + +from daft.expressions.visitor import PredicateVisitor + +if TYPE_CHECKING: + from pypaimon.common.predicate import Predicate + from pypaimon.common.predicate_builder import PredicateBuilder + from pypaimon.table.file_store_table import FileStoreTable + + from daft.daft import PyExpr + from daft.expressions import Expression + + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True, slots=True) +class _ColRef: + """Column reference marker to distinguish columns from literal values in the tree fold.""" + + name: str + + +class PaimonPredicateVisitor(PredicateVisitor[Any]): + """Tree fold visitor that converts Daft expressions to Paimon predicates. + + Leaf nodes return their values (_ColRef for columns, raw values for literals). + Predicate nodes return Paimon Predicate objects, or None if unsupported. + + Supported operations: + - Comparison: ==, !=, <, <=, >, >= + - Is null / Is not null + - Is in + - Between (inclusive) + - String: startswith, endswith, contains + - Logical: and, or + """ + + def __init__(self, builder: PredicateBuilder) -> None: + self._builder = builder + + # -- Leaf nodes -- + + def visit_col(self, name: str) -> _ColRef: + return _ColRef(name) + + def visit_lit(self, value: Any) -> Any: + return value + + def visit_alias(self, expr: Expression, alias: str) -> Any: + return self.visit(expr) + + def visit_cast(self, expr: Expression, dtype: Any) -> None: + return None + + def visit_coalesce(self, args: list[Expression]) -> None: + return None + + def visit_function(self, name: str, args: list[Expression]) -> None: + logger.debug("Function '%s' is not supported for Paimon pushdown", name) + + # -- Logical operators -- + + def visit_and(self, left: Expression, right: Expression) -> Predicate | None: + left_pred = self.visit(left) + right_pred = self.visit(right) + if left_pred is not None and right_pred is not None: + return self._builder.and_predicates([left_pred, right_pred]) + return None + + def visit_or(self, left: Expression, right: Expression) -> Predicate | None: + left_pred = self.visit(left) + right_pred = self.visit(right) + if left_pred is not None and right_pred is not None: + return self._builder.or_predicates([left_pred, right_pred]) + return None + + def visit_not(self, expr: Expression) -> None: + return None + + # -- Comparison operators -- + + def _cmp(self, left: Expression, right: Expression, fn: Any) -> Predicate | None: + """Fold a binary comparison: extract col ref and literal value, then apply fn.""" + lhs, rhs = self.visit(left), self.visit(right) + if isinstance(lhs, _ColRef) and not isinstance(rhs, _ColRef): + return fn(lhs.name, rhs) + if isinstance(rhs, _ColRef) and not isinstance(lhs, _ColRef): + return fn(rhs.name, lhs) Review Comment: For visit_less_than (left, right), the semantics are left<right. When rhs is _SlRef (i.e. 3<col), the code calls less_than (col, 3), which means col<3, but the actual semantics should be col>3. For asymmetric operators (<,<=,>,>=), reversing the position of column/literal requires reversing the operator at the same time. ==And! =Coincidentally, there's no problem because they're symmetrical. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
