This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new fb624cccb4 API: Add try catch on `ExpressionUtils.sanitizeString`
(#6920)
fb624cccb4 is described below
commit fb624cccb4c7461d84409e4fd834c4350a15e24a
Author: Xinyi Lu <[email protected]>
AuthorDate: Tue Feb 28 23:49:51 2023 -0800
API: Add try catch on `ExpressionUtils.sanitizeString` (#6920)
* add try catch on ExpressionUtils.sanitizeString
* add unit test
* use assertThat to print more error info
---
.../apache/iceberg/expressions/ExpressionUtil.java | 31 +++++++++++++---------
.../iceberg/expressions/TestExpressionUtil.java | 15 +++++++++++
2 files changed, 34 insertions(+), 12 deletions(-)
diff --git
a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
index bff061968f..aa36fb51b7 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
@@ -422,18 +422,25 @@ public class ExpressionUtil {
}
private static String sanitizeString(CharSequence value, long now, int
today) {
- if (DATE.matcher(value).matches()) {
- Literal<Integer> date = Literal.of(value).to(Types.DateType.get());
- return sanitizeDate(date.value(), today);
- } else if (TIMESTAMP.matcher(value).matches()) {
- Literal<Long> ts =
Literal.of(value).to(Types.TimestampType.withoutZone());
- return sanitizeTimestamp(ts.value(), now);
- } else if (TIMESTAMPTZ.matcher(value).matches()) {
- Literal<Long> ts = Literal.of(value).to(Types.TimestampType.withZone());
- return sanitizeTimestamp(ts.value(), now);
- } else if (TIME.matcher(value).matches()) {
- return "(time)";
- } else {
+ try {
+ if (DATE.matcher(value).matches()) {
+ Literal<Integer> date = Literal.of(value).to(Types.DateType.get());
+ return sanitizeDate(date.value(), today);
+ } else if (TIMESTAMP.matcher(value).matches()) {
+ Literal<Long> ts =
Literal.of(value).to(Types.TimestampType.withoutZone());
+ return sanitizeTimestamp(ts.value(), now);
+ } else if (TIMESTAMPTZ.matcher(value).matches()) {
+ Literal<Long> ts =
Literal.of(value).to(Types.TimestampType.withZone());
+ return sanitizeTimestamp(ts.value(), now);
+ } else if (TIME.matcher(value).matches()) {
+ return "(time)";
+ } else {
+ return sanitizeSimpleString(value);
+ }
+ } catch (Exception ex) {
+ // Don't throw when parsing failed in sanitizeString
+ // because user could provide an invalid integer/date/timestamp string
+ // and expect them to be treated as a string instead of specific type
return sanitizeSimpleString(value);
}
}
diff --git
a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
index 56cf0afa46..770a9df13a 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
@@ -24,6 +24,7 @@ import java.time.ZoneOffset;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import java.util.regex.Pattern;
import java.util.stream.IntStream;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
@@ -596,6 +597,20 @@ public class TestExpressionUtil {
ExpressionUtil.toSanitizedString(Expressions.equal("test", nextWeek)));
}
+ @Test
+ public void testSanitizeStringFallback() {
+ Pattern filterPattern = Pattern.compile("^test =
\\(hash-[0-9a-fA-F]{8}\\)$");
+ for (String filter :
+ Lists.newArrayList(
+ "2022-20-29",
+ "2022-04-29T40:49:51.123456",
+ "2022-04-29T23:70:51-07:00",
+ "2022-04-29T23:49:51.123456+100:00")) {
+ String sanitizedFilter =
ExpressionUtil.toSanitizedString(Expressions.equal("test", filter));
+ Assertions.assertThat(filterPattern.matcher(sanitizedFilter)).matches();
+ }
+ }
+
@Test
public void testIdenticalExpressionIsEquivalent() {
Expression[] exprs =