This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new fb624cccb4 API: Add try catch on `ExpressionUtils.sanitizeString` 
(#6920)
fb624cccb4 is described below

commit fb624cccb4c7461d84409e4fd834c4350a15e24a
Author: Xinyi Lu <[email protected]>
AuthorDate: Tue Feb 28 23:49:51 2023 -0800

    API: Add try catch on `ExpressionUtils.sanitizeString` (#6920)
    
    * add try catch on ExpressionUtils.sanitizeString
    
    * add unit test
    
    * use assertThat to print more error info
---
 .../apache/iceberg/expressions/ExpressionUtil.java | 31 +++++++++++++---------
 .../iceberg/expressions/TestExpressionUtil.java    | 15 +++++++++++
 2 files changed, 34 insertions(+), 12 deletions(-)

diff --git 
a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java 
b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
index bff061968f..aa36fb51b7 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
@@ -422,18 +422,25 @@ public class ExpressionUtil {
   }
 
   private static String sanitizeString(CharSequence value, long now, int 
today) {
-    if (DATE.matcher(value).matches()) {
-      Literal<Integer> date = Literal.of(value).to(Types.DateType.get());
-      return sanitizeDate(date.value(), today);
-    } else if (TIMESTAMP.matcher(value).matches()) {
-      Literal<Long> ts = 
Literal.of(value).to(Types.TimestampType.withoutZone());
-      return sanitizeTimestamp(ts.value(), now);
-    } else if (TIMESTAMPTZ.matcher(value).matches()) {
-      Literal<Long> ts = Literal.of(value).to(Types.TimestampType.withZone());
-      return sanitizeTimestamp(ts.value(), now);
-    } else if (TIME.matcher(value).matches()) {
-      return "(time)";
-    } else {
+    try {
+      if (DATE.matcher(value).matches()) {
+        Literal<Integer> date = Literal.of(value).to(Types.DateType.get());
+        return sanitizeDate(date.value(), today);
+      } else if (TIMESTAMP.matcher(value).matches()) {
+        Literal<Long> ts = 
Literal.of(value).to(Types.TimestampType.withoutZone());
+        return sanitizeTimestamp(ts.value(), now);
+      } else if (TIMESTAMPTZ.matcher(value).matches()) {
+        Literal<Long> ts = 
Literal.of(value).to(Types.TimestampType.withZone());
+        return sanitizeTimestamp(ts.value(), now);
+      } else if (TIME.matcher(value).matches()) {
+        return "(time)";
+      } else {
+        return sanitizeSimpleString(value);
+      }
+    } catch (Exception ex) {
+      // Don't throw when parsing failed in sanitizeString
+      // because user could provide an invalid integer/date/timestamp string
+      // and expect them to be treated as a string instead of specific type
       return sanitizeSimpleString(value);
     }
   }
diff --git 
a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java 
b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
index 56cf0afa46..770a9df13a 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
@@ -24,6 +24,7 @@ import java.time.ZoneOffset;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.regex.Pattern;
 import java.util.stream.IntStream;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
@@ -596,6 +597,20 @@ public class TestExpressionUtil {
         ExpressionUtil.toSanitizedString(Expressions.equal("test", nextWeek)));
   }
 
+  @Test
+  public void testSanitizeStringFallback() {
+    Pattern filterPattern = Pattern.compile("^test = 
\\(hash-[0-9a-fA-F]{8}\\)$");
+    for (String filter :
+        Lists.newArrayList(
+            "2022-20-29",
+            "2022-04-29T40:49:51.123456",
+            "2022-04-29T23:70:51-07:00",
+            "2022-04-29T23:49:51.123456+100:00")) {
+      String sanitizedFilter = 
ExpressionUtil.toSanitizedString(Expressions.equal("test", filter));
+      Assertions.assertThat(filterPattern.matcher(sanitizedFilter)).matches();
+    }
+  }
+
   @Test
   public void testIdenticalExpressionIsEquivalent() {
     Expression[] exprs =

Reply via email to