pvary commented on code in PR #14500:
URL: https://github.com/apache/iceberg/pull/14500#discussion_r2704468257
##########
api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java:
##########
@@ -744,4 +745,200 @@ private static PartitionSpec identitySpec(Schema schema,
int... ids) {
return specBuilder.build();
}
+
+ /**
+ * Checks if the given bound expression contains any UUID predicates that
compare against min/max
+ * bounds. These predicates may produce incorrect results when evaluating
against statistics
+ * written with a different UUID comparator.
+ *
+ * @param expr a bound expression
+ * @return true if the expression contains UUID predicates that compare
against bounds
+ */
+ public static boolean hasUUIDBoundsPredicate(Expression expr) {
+ return ExpressionVisitors.visit(expr, new UUIDBoundsPredicateDetector());
+ }
+
+ /**
+ * Transforms an unbound expression to use the signed UUID comparator in all
UUID literals. This
+ * is used for backward compatibility with files written before RFC
4122/9562 compliant comparison
+ * was implemented.
+ *
+ * @param expr an unbound expression
+ * @return a new expression with UUID literals using the signed comparator
+ */
+ public static Expression withSignedUUIDComparator(Expression expr) {
+ return ExpressionVisitors.visit(expr, new SignedUUIDLiteralTransformer());
+ }
+
+ /**
+ * Visitor that detects if an expression contains UUID predicates that
compare against bounds.
+ * These include: lt, ltEq, gt, gtEq, eq, notEq, in, notIn on UUID columns.
+ */
+ private static class UUIDBoundsPredicateDetector
+ extends ExpressionVisitors.ExpressionVisitor<Boolean> {
+
+ @Override
+ public Boolean alwaysTrue() {
+ return false;
+ }
+
+ @Override
+ public Boolean alwaysFalse() {
+ return false;
+ }
+
+ @Override
+ public Boolean not(Boolean result) {
+ return result;
+ }
+
+ @Override
+ public Boolean and(Boolean leftResult, Boolean rightResult) {
+ return leftResult || rightResult;
+ }
+
+ @Override
+ public Boolean or(Boolean leftResult, Boolean rightResult) {
+ return leftResult || rightResult;
+ }
+
+ @Override
+ public <T> Boolean predicate(BoundPredicate<T> pred) {
+ if (pred.term() instanceof BoundReference) {
+ BoundReference<?> ref = (BoundReference<?>) pred.term();
+ if (ref.type().typeId() == Type.TypeID.UUID) {
+ switch (pred.op()) {
+ case LT:
+ case LT_EQ:
+ case GT:
+ case GT_EQ:
+ case EQ:
+ case NOT_EQ:
+ case IN:
+ case NOT_IN:
+ return true;
+ default:
+ return false;
+ }
+ }
+ }
+ return false;
Review Comment:
nit: newline
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]