This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new e209f188f9 API: Implement bound expression sanitization (#8149)
e209f188f9 is described below
commit e209f188f93acf53c34d43ededc40b9daeb040ba
Author: Ryan Blue <[email protected]>
AuthorDate: Fri Jul 28 10:26:14 2023 -0700
API: Implement bound expression sanitization (#8149)
---
.../apache/iceberg/expressions/ExpressionUtil.java | 172 ++++++++++++++++++++-
.../iceberg/expressions/TestExpressionUtil.java | 150 +++++++++++++++++-
.../main/java/org/apache/iceberg/SnapshotScan.java | 4 +-
3 files changed, 322 insertions(+), 4 deletions(-)
diff --git
a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
index 7eb61cc14e..3708dafc41 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
@@ -34,6 +34,7 @@ import org.apache.iceberg.Table;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.transforms.Transforms;
+import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
/** Expression utility methods. */
@@ -70,6 +71,29 @@ public class ExpressionUtil {
return ExpressionVisitors.visit(expr, new ExpressionSanitizer());
}
+ /**
+ * Produces an unbound {@link Expression} with the same structure, but with
data values replaced
+ * by descriptions.
+ *
+ * <p>Numbers are replaced with magnitude and type, string-like values are
replaced by hashes, and
+ * date/time values are replaced by the type.
+ *
+ * @param struct a StructType to bind the expression
+ * @param expr an Expression to sanitize
+ * @param caseSensitive whether to bind case sensitively
+ * @return a sanitized Expression
+ */
+ public static Expression sanitize(
+ Types.StructType struct, Expression expr, boolean caseSensitive) {
+ try {
+ Expression bound = Binder.bind(struct, expr, caseSensitive);
+ return ExpressionVisitors.visit(bound, new ExpressionSanitizer());
+ } catch (RuntimeException e) {
+ // if the expression cannot be bound, sanitize the unbound version
+ return ExpressionVisitors.visit(expr, new ExpressionSanitizer());
+ }
+ }
+
/**
* Produces a sanitized expression string with the same structure, but with
data values replaced
* by descriptions.
@@ -84,6 +108,29 @@ public class ExpressionUtil {
return ExpressionVisitors.visit(expr, new StringSanitizer());
}
+ /**
+ * Produces a sanitized expression string with the same structure, but with
data values replaced
+ * by descriptions.
+ *
+ * <p>Numbers are replaced with magnitude and type, string-like values are
replaced by hashes, and
+ * date/time values are replaced by the type.
+ *
+ * @param struct a StructType to bind the expression
+ * @param expr an Expression to sanitize
+ * @param caseSensitive whether to bind case sensitively
+ * @return a sanitized expression string
+ */
+ public static String toSanitizedString(
+ Types.StructType struct, Expression expr, boolean caseSensitive) {
+ try {
+ Expression bound = Binder.bind(struct, expr, caseSensitive);
+ return ExpressionVisitors.visit(bound, new StringSanitizer());
+ } catch (RuntimeException e) {
+ // if the expression cannot be bound, sanitize the unbound version
+ return ExpressionVisitors.visit(expr, new StringSanitizer());
+ }
+ }
+
/**
* Extracts an expression that references only the given column IDs from the
given expression.
*
@@ -176,6 +223,28 @@ public class ExpressionUtil {
}
}
+ public static <T> UnboundTerm<T> unbind(BoundTerm<T> term) {
+ if (term instanceof BoundTransform) {
+ BoundTransform<?, T> bound = (BoundTransform<?, T>) term;
+ return Expressions.transform(bound.ref().name(), bound.transform());
+ } else if (term instanceof BoundReference) {
+ return Expressions.ref(((BoundReference<T>) term).name());
+ }
+
+ throw new UnsupportedOperationException("Cannot unbind unsupported term: "
+ term);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <T> UnboundTerm<T> unbind(Term term) {
+ if (term instanceof UnboundTerm) {
+ return (UnboundTerm<T>) term;
+ } else if (term instanceof BoundTerm) {
+ return unbind((BoundTerm<T>) term);
+ }
+
+ throw new UnsupportedOperationException("Cannot unbind unsupported term: "
+ term);
+ }
+
private static class ExpressionSanitizer
extends ExpressionVisitors.ExpressionVisitor<Expression> {
private final long now;
@@ -214,8 +283,28 @@ public class ExpressionUtil {
}
@Override
+ @SuppressWarnings("unchecked")
public <T> Expression predicate(BoundPredicate<T> pred) {
- throw new UnsupportedOperationException("Cannot sanitize bound
predicate: " + pred);
+ if (pred.isUnaryPredicate()) {
+ // unary predicates don't need to be sanitized
+ return new UnboundPredicate<>(pred.op(), unbind(pred.term()));
+ } else if (pred.isLiteralPredicate()) {
+ BoundLiteralPredicate<T> bound = (BoundLiteralPredicate<T>) pred;
+ return new UnboundPredicate<>(
+ pred.op(),
+ unbind(pred.term()),
+ (T) sanitize(bound.term().type(), bound.literal(), now, today));
+ } else if (pred.isSetPredicate()) {
+ BoundSetPredicate<T> bound = (BoundSetPredicate<T>) pred;
+ Iterable<T> iter =
+ () ->
+ bound.literalSet().stream()
+ .map(lit -> (T) sanitize(bound.term().type(), lit, now,
today))
+ .iterator();
+ return new UnboundPredicate<>(pred.op(), unbind(pred.term()), iter);
+ }
+
+ throw new UnsupportedOperationException("Cannot sanitize bound predicate
type: " + pred.op());
}
@Override
@@ -286,9 +375,60 @@ public class ExpressionUtil {
return "(" + leftResult + " OR " + rightResult + ")";
}
+ private String value(BoundLiteralPredicate<?> pred) {
+ return sanitize(pred.term().type(), pred.literal().value(), nowMicros,
today);
+ }
+
@Override
public <T> String predicate(BoundPredicate<T> pred) {
- throw new UnsupportedOperationException("Cannot sanitize bound
predicate: " + pred);
+ String term = describe(pred.term());
+ switch (pred.op()) {
+ case IS_NULL:
+ return term + " IS NULL";
+ case NOT_NULL:
+ return term + " IS NOT NULL";
+ case IS_NAN:
+ return "is_nan(" + term + ")";
+ case NOT_NAN:
+ return "not_nan(" + term + ")";
+ case LT:
+ return term + " < " + value((BoundLiteralPredicate<?>) pred);
+ case LT_EQ:
+ return term + " <= " + value((BoundLiteralPredicate<?>) pred);
+ case GT:
+ return term + " > " + value((BoundLiteralPredicate<?>) pred);
+ case GT_EQ:
+ return term + " >= " + value((BoundLiteralPredicate<?>) pred);
+ case EQ:
+ return term + " = " + value((BoundLiteralPredicate<?>) pred);
+ case NOT_EQ:
+ return term + " != " + value((BoundLiteralPredicate<?>) pred);
+ case IN:
+ return term
+ + " IN "
+ + abbreviateValues(
+ pred.asSetPredicate().literalSet().stream()
+ .map(lit -> sanitize(pred.term().type(), lit,
nowMicros, today))
+ .collect(Collectors.toList()))
+ .stream()
+ .collect(Collectors.joining(", ", "(", ")"));
+ case NOT_IN:
+ return term
+ + " NOT IN "
+ + abbreviateValues(
+ pred.asSetPredicate().literalSet().stream()
+ .map(lit -> sanitize(pred.term().type(), lit,
nowMicros, today))
+ .collect(Collectors.toList()))
+ .stream()
+ .collect(Collectors.joining(", ", "(", ")"));
+ case STARTS_WITH:
+ return term + " STARTS WITH " + value((BoundLiteralPredicate<?>)
pred);
+ case NOT_STARTS_WITH:
+ return term + " NOT STARTS WITH " + value((BoundLiteralPredicate<?>)
pred);
+ default:
+ throw new UnsupportedOperationException(
+ "Cannot sanitize unsupported predicate type: " + pred.op());
+ }
}
@Override
@@ -361,6 +501,34 @@ public class ExpressionUtil {
return sanitizedValues;
}
+ private static String sanitize(Type type, Object value, long now, int today)
{
+ switch (type.typeId()) {
+ case INTEGER:
+ case LONG:
+ return sanitizeNumber((Number) value, "int");
+ case FLOAT:
+ case DOUBLE:
+ return sanitizeNumber((Number) value, "float");
+ case DATE:
+ return sanitizeDate((int) value, today);
+ case TIME:
+ return "(time)";
+ case TIMESTAMP:
+ return sanitizeTimestamp((long) value, now);
+ case STRING:
+ return sanitizeString((CharSequence) value, now, today);
+ case BOOLEAN:
+ case UUID:
+ case DECIMAL:
+ case FIXED:
+ case BINARY:
+ // for boolean, uuid, decimal, fixed, and binary, match the string
result
+ return sanitizeSimpleString(value.toString());
+ }
+ throw new UnsupportedOperationException(
+ String.format("Cannot sanitize value for unsupported type %s: %s",
type, value));
+ }
+
private static String sanitize(Literal<?> literal, long now, int today) {
if (literal instanceof Literals.StringLiteral) {
return sanitizeString(((Literals.StringLiteral) literal).value(), now,
today);
diff --git
a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
index 2bfa9bc20a..9a27830543 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
@@ -46,10 +46,14 @@ public class TestExpressionUtil {
Types.NestedField.required(5, "date", Types.DateType.get()),
Types.NestedField.required(6, "time", Types.DateType.get()),
Types.NestedField.optional(7, "data", Types.StringType.get()),
- Types.NestedField.optional(8, "measurement",
Types.DoubleType.get()));
+ Types.NestedField.optional(8, "measurement", Types.DoubleType.get()),
+ Types.NestedField.optional(9, "test", Types.IntegerType.get()));
private static final Types.StructType STRUCT = SCHEMA.asStruct();
+ private static final Types.StructType FLOAT_TEST =
+ Types.StructType.of(Types.NestedField.optional(1, "test",
Types.FloatType.get()));
+
@Test
public void testUnchangedUnaryPredicates() {
for (Expression unary :
@@ -59,6 +63,7 @@ public class TestExpressionUtil {
Expressions.isNaN("test"),
Expressions.notNaN("test"))) {
assertEquals(unary, ExpressionUtil.sanitize(unary));
+ assertEquals(unary, ExpressionUtil.sanitize(FLOAT_TEST, unary, true));
}
}
@@ -68,9 +73,17 @@ public class TestExpressionUtil {
Expressions.in("test", "(2-digit-int)", "(3-digit-int)"),
ExpressionUtil.sanitize(Expressions.in("test", 34, 345)));
+ assertEquals(
+ Expressions.in("test", "(2-digit-int)", "(3-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.in("test", 34, 345),
true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.in("test", 34,
345)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test IN ((2-digit-int), (3-digit-int))");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT, Expressions.in("test",
34, 345), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test IN ((2-digit-int), (3-digit-int))");
}
@Test
@@ -126,6 +139,10 @@ public class TestExpressionUtil {
assertThat(ExpressionUtil.toSanitizedString(Expressions.notIn("test", 34,
345)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test NOT IN ((2-digit-int), (3-digit-int))");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.notIn("test", 34, 345), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test NOT IN ((2-digit-int), (3-digit-int))");
}
@Test
@@ -161,9 +178,17 @@ public class TestExpressionUtil {
Expressions.lessThan("test", "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.lessThan("test", 34)));
+ assertEquals(
+ Expressions.lessThan("test", "(2-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.lessThan("test", 34),
true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.lessThan("test",
34)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test < (2-digit-int)");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.lessThan("test", 34), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test < (2-digit-int)");
}
@Test
@@ -172,9 +197,18 @@ public class TestExpressionUtil {
Expressions.lessThanOrEqual("test", "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.lessThanOrEqual("test", 34)));
+ assertEquals(
+ Expressions.lessThanOrEqual("test", "(2-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.lessThanOrEqual("test",
34), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.lessThanOrEqual("test",
34)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test <= (2-digit-int)");
+
+ assertThat(
+ ExpressionUtil.toSanitizedString(STRUCT,
Expressions.lessThanOrEqual("test", 34), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test <= (2-digit-int)");
}
@Test
@@ -183,9 +217,17 @@ public class TestExpressionUtil {
Expressions.greaterThan("test", "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.greaterThan("test", 34)));
+ assertEquals(
+ Expressions.greaterThan("test", "(2-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.greaterThan("test", 34),
true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.greaterThan("test",
34)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test > (2-digit-int)");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.greaterThan("test", 34), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test > (2-digit-int)");
}
@Test
@@ -194,9 +236,19 @@ public class TestExpressionUtil {
Expressions.greaterThanOrEqual("test", "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.greaterThanOrEqual("test", 34)));
+ assertEquals(
+ Expressions.greaterThanOrEqual("test", "(2-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.greaterThanOrEqual("test",
34), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.greaterThanOrEqual("test",
34)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test >= (2-digit-int)");
+
+ assertThat(
+ ExpressionUtil.toSanitizedString(
+ STRUCT, Expressions.greaterThanOrEqual("test", 34), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test >= (2-digit-int)");
}
@Test
@@ -205,9 +257,17 @@ public class TestExpressionUtil {
Expressions.equal("test", "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.equal("test", 34)));
+ assertEquals(
+ Expressions.equal("test", "(2-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.equal("test", 34), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", 34)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test = (2-digit-int)");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.equal("test", 34), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test = (2-digit-int)");
}
@Test
@@ -216,9 +276,17 @@ public class TestExpressionUtil {
Expressions.notEqual("test", "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.notEqual("test", 34)));
+ assertEquals(
+ Expressions.notEqual("test", "(2-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.notEqual("test", 34),
true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.notEqual("test",
34)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test != (2-digit-int)");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.notEqual("test", 34), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test != (2-digit-int)");
}
@Test
@@ -227,9 +295,18 @@ public class TestExpressionUtil {
Expressions.startsWith("test", "(hash-34d05fb7)"),
ExpressionUtil.sanitize(Expressions.startsWith("test", "aaa")));
+ assertEquals(
+ Expressions.startsWith("data", "(hash-34d05fb7)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.startsWith("data", "aaa"),
true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.startsWith("test",
"aaa")))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test STARTS WITH (hash-34d05fb7)");
+
+ assertThat(
+ ExpressionUtil.toSanitizedString(STRUCT,
Expressions.startsWith("data", "aaa"), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("data STARTS WITH (hash-34d05fb7)");
}
@Test
@@ -238,9 +315,19 @@ public class TestExpressionUtil {
Expressions.notStartsWith("test", "(hash-34d05fb7)"),
ExpressionUtil.sanitize(Expressions.notStartsWith("test", "aaa")));
+ assertEquals(
+ Expressions.notStartsWith("data", "(hash-34d05fb7)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.notStartsWith("data",
"aaa"), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.notStartsWith("test",
"aaa")))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test NOT STARTS WITH (hash-34d05fb7)");
+
+ assertThat(
+ ExpressionUtil.toSanitizedString(
+ STRUCT, Expressions.notStartsWith("data", "aaa"), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("data NOT STARTS WITH (hash-34d05fb7)");
}
@Test
@@ -249,11 +336,22 @@ public class TestExpressionUtil {
Expressions.equal(Expressions.truncate("test", 2), "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.equal(Expressions.truncate("test",
2), 34)));
+ assertEquals(
+ Expressions.equal(Expressions.truncate("test", 2), "(2-digit-int)"),
+ ExpressionUtil.sanitize(
+ STRUCT, Expressions.equal(Expressions.truncate("test", 2), 34),
true));
+
assertThat(
ExpressionUtil.toSanitizedString(
Expressions.equal(Expressions.truncate("test", 2), 34)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("truncate[2](test) = (2-digit-int)");
+
+ assertThat(
+ ExpressionUtil.toSanitizedString(
+ STRUCT, Expressions.equal(Expressions.truncate("test", 2),
34), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("truncate[2](test) = (2-digit-int)");
}
@Test
@@ -262,9 +360,17 @@ public class TestExpressionUtil {
Expressions.equal("test", "(2-digit-int)"),
ExpressionUtil.sanitize(Expressions.equal("test", 34L)));
+ assertEquals(
+ Expressions.equal("id", "(2-digit-int)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.equal("id", 34L), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test",
34L)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test = (2-digit-int)");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.equal("id", 34L), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("id = (2-digit-int)");
}
@Test
@@ -273,9 +379,17 @@ public class TestExpressionUtil {
Expressions.equal("test", "(2-digit-float)"),
ExpressionUtil.sanitize(Expressions.equal("test", 34.12F)));
+ assertEquals(
+ Expressions.equal("test", "(2-digit-float)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.equal("test", 34.12F),
true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test",
34.12F)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test = (2-digit-float)");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.equal("test", 34.12F), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("test = (2-digit-float)");
}
@Test
@@ -284,9 +398,17 @@ public class TestExpressionUtil {
Expressions.equal("test", "(2-digit-float)"),
ExpressionUtil.sanitize(Expressions.equal("test", 34.12D)));
+ assertEquals(
+ Expressions.equal("measurement", "(2-digit-float)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.equal("measurement",
34.12D), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test",
34.12D)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test = (2-digit-float)");
+
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("measurement",
34.12D)))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("measurement = (2-digit-float)");
}
@Test
@@ -295,9 +417,18 @@ public class TestExpressionUtil {
Expressions.equal("test", "(date)"),
ExpressionUtil.sanitize(Expressions.equal("test", "2022-04-29")));
+ assertEquals(
+ Expressions.equal("date", "(date)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.equal("date",
"2022-04-29"), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test",
"2022-04-29")))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test = (date)");
+
+ assertThat(
+ ExpressionUtil.toSanitizedString(STRUCT, Expressions.equal("date",
"2022-04-29"), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("date = (date)");
}
@Test
@@ -309,9 +440,18 @@ public class TestExpressionUtil {
Expressions.equal("test", "(time)"),
ExpressionUtil.sanitize(Expressions.equal("test", currentTime)));
+ assertEquals(
+ Expressions.equal("time", "(time)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.equal("time",
currentTime), true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test",
currentTime)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test = (time)");
+
+ assertThat(
+ ExpressionUtil.toSanitizedString(STRUCT, Expressions.equal("time",
currentTime), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("time = (time)");
}
@Test
@@ -326,9 +466,17 @@ public class TestExpressionUtil {
Expressions.equal("test", "(timestamp)"),
ExpressionUtil.sanitize(Expressions.equal("test", timestamp)));
+ assertEquals(
+ Expressions.equal("ts", "(timestamp)"),
+ ExpressionUtil.sanitize(STRUCT, Expressions.equal("ts", timestamp),
true));
+
assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test",
timestamp)))
.as("Sanitized string should be identical except for descriptive
literal")
.isEqualTo("test = (timestamp)");
+
+ assertThat(ExpressionUtil.toSanitizedString(STRUCT,
Expressions.equal("ts", timestamp), true))
+ .as("Sanitized string should be identical except for descriptive
literal")
+ .isEqualTo("ts = (timestamp)");
}
}
diff --git a/core/src/main/java/org/apache/iceberg/SnapshotScan.java
b/core/src/main/java/org/apache/iceberg/SnapshotScan.java
index b6520c2ff4..de53444ba9 100644
--- a/core/src/main/java/org/apache/iceberg/SnapshotScan.java
+++ b/core/src/main/java/org/apache/iceberg/SnapshotScan.java
@@ -140,7 +140,9 @@ public abstract class SnapshotScan<ThisT, T extends
ScanTask, G extends ScanTask
.projectedFieldNames(projectedFieldNames)
.tableName(table().name())
.snapshotId(snapshot.snapshotId())
- .filter(ExpressionUtil.sanitize(filter()))
+ .filter(
+ ExpressionUtil.sanitize(
+ schema().asStruct(), filter(),
context().caseSensitive()))
.scanMetrics(ScanMetricsResult.fromScanMetrics(scanMetrics()))
.metadata(metadata)
.build();