This is an automated email from the ASF dual-hosted git repository.

hui pushed a commit to branch lmh/refactorFilter
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit 789f9a6403dae90f82be930af4252c8e92902f20
Author: Minghui Liu <[email protected]>
AuthorDate: Sun Nov 19 22:42:06 2023 +0800

    replace like with regex
---
 .../plan/expression/unary/LikeExpression.java      |  63 +----------
 .../plan/expression/unary/RegularExpression.java   |   6 +-
 .../ConvertExpressionToFilterVisitor.java          |  13 ++-
 .../filter/basic/ColumnPatternMatchFilter.java     |  13 +--
 .../tsfile/read/filter/factory/ValueFilter.java    | 121 +++++++++++++++++----
 .../read/filter/operator/ValueFilterOperators.java | 108 ++----------------
 6 files changed, 127 insertions(+), 197 deletions(-)

diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java
index 2c08185f5c6..c7f11f1ec46 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java
@@ -29,6 +29,9 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.regex.Pattern;
 
+import static 
org.apache.iotdb.tsfile.read.filter.factory.ValueFilter.compileRegex;
+import static 
org.apache.iotdb.tsfile.read.filter.factory.ValueFilter.parseLikePatternToRegex;
+
 public class LikeExpression extends UnaryExpression {
 
   private final String patternString;
@@ -40,7 +43,7 @@ public class LikeExpression extends UnaryExpression {
     super(expression);
     this.patternString = patternString;
     this.isNot = isNot;
-    pattern = compile();
+    pattern = compileRegex(parseLikePatternToRegex(patternString));
   }
 
   public LikeExpression(
@@ -55,7 +58,7 @@ public class LikeExpression extends UnaryExpression {
     super(Expression.deserialize(byteBuffer));
     patternString = ReadWriteIOUtils.readString(byteBuffer);
     isNot = ReadWriteIOUtils.readBool(byteBuffer);
-    pattern = compile();
+    pattern = compileRegex(parseLikePatternToRegex(patternString));
   }
 
   public String getPatternString() {
@@ -70,62 +73,6 @@ public class LikeExpression extends UnaryExpression {
     return isNot;
   }
 
-  /**
-   * The main idea of this part comes from
-   * 
https://codereview.stackexchange.com/questions/36861/convert-sql-like-to-regex/36864
-   */
-  private Pattern compile() {
-    String unescapeValue = unescapeString(patternString);
-    String specialRegexString = ".^$*+?{}[]|()";
-    StringBuilder patternBuilder = new StringBuilder();
-    patternBuilder.append("^");
-    for (int i = 0; i < unescapeValue.length(); i++) {
-      String ch = String.valueOf(unescapeValue.charAt(i));
-      if (specialRegexString.contains(ch)) {
-        ch = "\\" + unescapeValue.charAt(i);
-      }
-      if (i == 0
-          || !"\\".equals(String.valueOf(unescapeValue.charAt(i - 1)))
-          || i >= 2
-              && "\\\\"
-                  .equals(
-                      patternBuilder.substring(
-                          patternBuilder.length() - 2, 
patternBuilder.length()))) {
-        patternBuilder.append(ch.replace("%", ".*?").replace("_", "."));
-      } else {
-        patternBuilder.append(ch);
-      }
-    }
-    patternBuilder.append("$");
-    return Pattern.compile(patternBuilder.toString());
-  }
-
-  /**
-   * This Method is for un-escaping strings except '\' before special string 
'%', '_', '\', because
-   * we need to use '\' to judge whether to replace this to regexp string
-   */
-  private String unescapeString(String value) {
-    StringBuilder stringBuilder = new StringBuilder();
-    int curIndex = 0;
-    for (; curIndex < value.length(); curIndex++) {
-      String ch = String.valueOf(value.charAt(curIndex));
-      if ("\\".equals(ch)) {
-        if (curIndex < value.length() - 1) {
-          String nextChar = String.valueOf(value.charAt(curIndex + 1));
-          if ("%".equals(nextChar) || "_".equals(nextChar) || 
"\\".equals(nextChar)) {
-            stringBuilder.append(ch);
-          }
-          if ("\\".equals(nextChar)) {
-            curIndex++;
-          }
-        }
-      } else {
-        stringBuilder.append(ch);
-      }
-    }
-    return stringBuilder.toString();
-  }
-
   @Override
   protected String getExpressionStringInternal() {
     return expression.getExpressionString() + (isNot ? " NOT" : "") + " LIKE 
'" + pattern + "'";
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java
index 449c2bc21b7..828b4b64aa4 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java
@@ -31,6 +31,8 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.regex.Pattern;
 
+import static 
org.apache.iotdb.tsfile.read.filter.factory.ValueFilter.compileRegex;
+
 public class RegularExpression extends UnaryExpression {
 
   private final String patternString;
@@ -42,7 +44,7 @@ public class RegularExpression extends UnaryExpression {
     super(expression);
     this.patternString = patternString;
     this.isNot = isNot;
-    pattern = Pattern.compile(patternString);
+    pattern = compileRegex(patternString);
   }
 
   public RegularExpression(
@@ -57,7 +59,7 @@ public class RegularExpression extends UnaryExpression {
     super(Expression.deserialize(byteBuffer));
     patternString = ReadWriteIOUtils.readString(byteBuffer);
     isNot = ReadWriteIOUtils.readBool(byteBuffer);
-    pattern = Pattern.compile(Validate.notNull(patternString));
+    pattern = compileRegex(Validate.notNull(patternString, "patternString 
cannot be null"));
   }
 
   public String getPatternString() {
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java
index f174e19427d..d6b2b7329ad 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java
@@ -47,6 +47,7 @@ import 
org.apache.iotdb.tsfile.read.filter.factory.ValueFilter;
 
 import java.util.LinkedHashSet;
 import java.util.Set;
+import java.util.regex.Pattern;
 
 import static 
org.apache.iotdb.tsfile.read.filter.operator.Not.CONTAIN_NOT_ERR_MSG;
 
@@ -159,11 +160,11 @@ public class ConvertExpressionToFilterVisitor extends 
PredicateVisitor<Filter, T
     }
 
     String measurement = ((TimeSeriesOperand) 
expression).getPath().getMeasurement();
-    String likePattern = likeExpression.getPatternString();
+    Pattern pattern = likeExpression.getPattern();
     boolean isNot = likeExpression.isNot();
     return isNot
-        ? ValueFilter.notLike(measurement, likePattern)
-        : ValueFilter.like(measurement, likePattern);
+        ? ValueFilter.notRegexp(measurement, pattern)
+        : ValueFilter.regexp(measurement, pattern);
   }
 
   @Override
@@ -174,11 +175,11 @@ public class ConvertExpressionToFilterVisitor extends 
PredicateVisitor<Filter, T
     }
 
     String measurement = ((TimeSeriesOperand) 
expression).getPath().getMeasurement();
-    String regex = regularExpression.getPatternString();
+    Pattern pattern = regularExpression.getPattern();
     boolean isNot = regularExpression.isNot();
     return isNot
-        ? ValueFilter.notRegexp(measurement, regex)
-        : ValueFilter.regexp(measurement, regex);
+        ? ValueFilter.notRegexp(measurement, pattern)
+        : ValueFilter.regexp(measurement, pattern);
   }
 
   @Override
diff --git 
a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java
 
b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java
index 656403e6788..41eb97ae650 100644
--- 
a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java
+++ 
b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java
@@ -21,20 +21,13 @@ package org.apache.iotdb.tsfile.read.filter.basic;
 
 import java.util.Objects;
 import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
 
-/* base class for Like, NotLike, Regex, NotRegex */
+/* base class for Regex, NotRegex */
 public abstract class ColumnPatternMatchFilter implements Filter {
 
-  protected final String regex;
   protected final Pattern pattern;
 
-  protected ColumnPatternMatchFilter(String regex) {
-    this.regex = Objects.requireNonNull(regex, "regex cannot be null");
-    try {
-      this.pattern = Pattern.compile(regex);
-    } catch (PatternSyntaxException e) {
-      throw new PatternSyntaxException("Regular expression error", regex, 
e.getIndex());
-    }
+  protected ColumnPatternMatchFilter(Pattern pattern) {
+    this.pattern = Objects.requireNonNull(pattern, "pattern cannot be null");
   }
 }
diff --git 
a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java
 
b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java
index bb0c36d77af..c68e226b19d 100644
--- 
a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java
+++ 
b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java
@@ -24,17 +24,17 @@ import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueEq
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueGt;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueGtEq;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueIn;
-import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueLike;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueLt;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueLtEq;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotBetweenAnd;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotEq;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotIn;
-import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotLike;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotRegexp;
 import 
org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueRegexp;
 
 import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
 
 public class ValueFilter {
 
@@ -76,20 +76,20 @@ public class ValueFilter {
     return new ValueNotBetweenAnd<>(FAKE_MEASUREMENT, value1, value2);
   }
 
-  public static ValueLike like(String value) {
-    return new ValueLike(FAKE_MEASUREMENT, value);
+  public static ValueRegexp like(String likePattern) {
+    return regexp(FAKE_MEASUREMENT, parseLikePatternToRegex(likePattern));
   }
 
-  public static ValueNotLike notLike(String value) {
-    return new ValueNotLike(FAKE_MEASUREMENT, value);
+  public static ValueNotRegexp notLike(String likePattern) {
+    return notRegexp(FAKE_MEASUREMENT, parseLikePatternToRegex(likePattern));
   }
 
-  public static ValueRegexp regexp(String value) {
-    return new ValueRegexp(FAKE_MEASUREMENT, value);
+  public static ValueRegexp regexp(String regex) {
+    return new ValueRegexp(FAKE_MEASUREMENT, compileRegex(regex));
   }
 
-  public static ValueNotRegexp notRegexp(String value) {
-    return new ValueNotRegexp(FAKE_MEASUREMENT, value);
+  public static ValueNotRegexp notRegexp(String regex) {
+    return new ValueNotRegexp(FAKE_MEASUREMENT, compileRegex(regex));
   }
 
   public static <T extends Comparable<T>> ValueIn<T> in(Set<T> values) {
@@ -134,20 +134,93 @@ public class ValueFilter {
     return new ValueNotBetweenAnd<>(measurement, value1, value2);
   }
 
-  public static ValueLike like(String measurement, String value) {
-    return new ValueLike(measurement, value);
-  }
-
-  public static ValueNotLike notLike(String measurement, String value) {
-    return new ValueNotLike(measurement, value);
-  }
-
-  public static ValueRegexp regexp(String measurement, String value) {
-    return new ValueRegexp(measurement, value);
-  }
-
-  public static ValueNotRegexp notRegexp(String measurement, String value) {
-    return new ValueNotRegexp(measurement, value);
+  public static ValueRegexp like(String measurement, String likePattern) {
+    return regexp(measurement, parseLikePatternToRegex(likePattern));
+  }
+
+  public static ValueNotRegexp notLike(String measurement, String likePattern) 
{
+    return notRegexp(measurement, parseLikePatternToRegex(likePattern));
+  }
+
+  public static ValueRegexp regexp(String measurement, String regex) {
+    return new ValueRegexp(measurement, compileRegex(regex));
+  }
+
+  public static ValueNotRegexp notRegexp(String measurement, String regex) {
+    return new ValueNotRegexp(measurement, compileRegex(regex));
+  }
+
+  public static ValueRegexp regexp(String measurement, Pattern pattern) {
+    return new ValueRegexp(measurement, pattern);
+  }
+
+  public static ValueNotRegexp notRegexp(String measurement, Pattern pattern) {
+    return new ValueNotRegexp(measurement, pattern);
+  }
+
+  /**
+   * The main idea of this part comes from
+   * 
https://codereview.stackexchange.com/questions/36861/convert-sql-like-to-regex/36864
+   */
+  public static String parseLikePatternToRegex(String likePattern) {
+    String unescapeValue = unescapeString(likePattern);
+    String specialRegexStr = ".^$*+?{}[]|()";
+    StringBuilder patternStrBuild = new StringBuilder();
+    patternStrBuild.append("^");
+    for (int i = 0; i < unescapeValue.length(); i++) {
+      String ch = String.valueOf(unescapeValue.charAt(i));
+      if (specialRegexStr.contains(ch)) {
+        ch = "\\" + unescapeValue.charAt(i);
+      }
+      if (i == 0
+          || !"\\".equals(String.valueOf(unescapeValue.charAt(i - 1)))
+          || i >= 2
+              && "\\\\"
+                  .equals(
+                      patternStrBuild.substring(
+                          patternStrBuild.length() - 2, 
patternStrBuild.length()))) {
+        String replaceStr = ch.replace("%", ".*?").replace("_", ".");
+        patternStrBuild.append(replaceStr);
+      } else {
+        patternStrBuild.append(ch);
+      }
+    }
+    patternStrBuild.append("$");
+    return patternStrBuild.toString();
+  }
+
+  /**
+   * This Method is for un-escaping strings except '\' before special string 
'%', '_', '\', because
+   * we need to use '\' to judge whether to replace this to regexp string
+   */
+  private static String unescapeString(String value) {
+    StringBuilder stringBuilder = new StringBuilder();
+    int curIndex = 0;
+    for (; curIndex < value.length(); curIndex++) {
+      String ch = String.valueOf(value.charAt(curIndex));
+      if ("\\".equals(ch)) {
+        if (curIndex < value.length() - 1) {
+          String nextChar = String.valueOf(value.charAt(curIndex + 1));
+          if ("%".equals(nextChar) || "_".equals(nextChar) || 
"\\".equals(nextChar)) {
+            stringBuilder.append(ch);
+          }
+          if ("\\".equals(nextChar)) {
+            curIndex++;
+          }
+        }
+      } else {
+        stringBuilder.append(ch);
+      }
+    }
+    return stringBuilder.toString();
+  }
+
+  public static Pattern compileRegex(String regex) {
+    try {
+      return Pattern.compile(regex);
+    } catch (PatternSyntaxException e) {
+      throw new PatternSyntaxException("Illegal regex expression: ", regex, 
e.getIndex());
+    }
   }
 
   public static <T extends Comparable<T>> ValueIn<T> in(String measurement, 
Set<T> values) {
diff --git 
a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java
 
b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java
index 545256d6b1a..bf0f93bb790 100644
--- 
a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java
+++ 
b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java
@@ -33,6 +33,7 @@ import org.apache.iotdb.tsfile.read.filter.basic.IValueFilter;
 import java.util.Locale;
 import java.util.Objects;
 import java.util.Set;
+import java.util.regex.Pattern;
 
 /**
  * These are the value column operators in a filter predicate expression tree. 
They are constructed
@@ -434,19 +435,19 @@ public final class ValueFilterOperators {
     }
   }
 
-  // base class for ValueLike, ValueNotLike, ValueRegex, ValueNotRegex
+  // base class for ValueRegex, ValueNotRegex
   abstract static class ValueColumnPatternMatchFilter extends 
ColumnPatternMatchFilter
       implements IDisableStatisticsValueFilter {
 
     protected final String measurement;
     private final String toString;
 
-    protected ValueColumnPatternMatchFilter(String measurement, String regex) {
-      super(regex);
+    protected ValueColumnPatternMatchFilter(String measurement, Pattern 
pattern) {
+      super(pattern);
       this.measurement = Objects.requireNonNull(measurement, "measurement 
cannot be null");
 
       String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
-      this.toString = name + "(" + measurement + ", " + regex + ")";
+      this.toString = name + "(" + measurement + ", " + pattern + ")";
     }
 
     public String getMeasurement() {
@@ -459,97 +460,10 @@ public final class ValueFilterOperators {
     }
   }
 
-  public static final class ValueLike extends ValueColumnPatternMatchFilter {
-
-    public ValueLike(String measurement, String regex) {
-      super(measurement, parseLikePatternToRegex(regex));
-    }
-
-    @Override
-    public boolean satisfy(long time, Object value) {
-      return pattern.matcher(value.toString()).find();
-    }
-
-    @Override
-    public Filter reverse() {
-      return new ValueNotLike(measurement, pattern.pattern());
-    }
-  }
-
-  public static final class ValueNotLike extends ValueColumnPatternMatchFilter 
{
-
-    public ValueNotLike(String measurement, String likePattern) {
-      super(measurement, parseLikePatternToRegex(likePattern));
-    }
-
-    @Override
-    public boolean satisfy(long time, Object value) {
-      return !pattern.matcher(value.toString()).find();
-    }
-
-    @Override
-    public Filter reverse() {
-      return new ValueLike(measurement, pattern.pattern());
-    }
-  }
-
-  private static String parseLikePatternToRegex(String value) {
-    String unescapeValue = unescapeString(value);
-    String specialRegexStr = ".^$*+?{}[]|()";
-    StringBuilder patternStrBuild = new StringBuilder();
-    patternStrBuild.append("^");
-    for (int i = 0; i < unescapeValue.length(); i++) {
-      String ch = String.valueOf(unescapeValue.charAt(i));
-      if (specialRegexStr.contains(ch)) {
-        ch = "\\" + unescapeValue.charAt(i);
-      }
-      if ((i == 0)
-          || (i > 0 && !"\\".equals(String.valueOf(unescapeValue.charAt(i - 
1))))
-          || (i >= 2
-              && "\\\\"
-                  .equals(
-                      patternStrBuild.substring(
-                          patternStrBuild.length() - 2, 
patternStrBuild.length())))) {
-        String replaceStr = ch.replace("%", ".*?").replace("_", ".");
-        patternStrBuild.append(replaceStr);
-      } else {
-        patternStrBuild.append(ch);
-      }
-    }
-    patternStrBuild.append("$");
-    return patternStrBuild.toString();
-  }
-
-  /**
-   * This Method is for unescaping strings except '\' before special string 
'%', '_', '\', because
-   * we need to use '\' to judge whether to replace this to regexp string
-   */
-  private static String unescapeString(String value) {
-    StringBuilder out = new StringBuilder();
-    int curIndex = 0;
-    for (; curIndex < value.length(); curIndex++) {
-      String ch = String.valueOf(value.charAt(curIndex));
-      if ("\\".equals(ch)) {
-        if (curIndex < value.length() - 1) {
-          String nextChar = String.valueOf(value.charAt(curIndex + 1));
-          if ("%".equals(nextChar) || "_".equals(nextChar) || 
"\\".equals(nextChar)) {
-            out.append(ch);
-          }
-          if ("\\".equals(nextChar)) {
-            curIndex++;
-          }
-        }
-      } else {
-        out.append(ch);
-      }
-    }
-    return out.toString();
-  }
-
   public static final class ValueRegexp extends ValueColumnPatternMatchFilter {
 
-    public ValueRegexp(String measurement, String regex) {
-      super(measurement, regex);
+    public ValueRegexp(String measurement, Pattern pattern) {
+      super(measurement, pattern);
     }
 
     @Override
@@ -559,14 +473,14 @@ public final class ValueFilterOperators {
 
     @Override
     public Filter reverse() {
-      return new ValueNotRegexp(measurement, pattern.pattern());
+      return new ValueNotRegexp(measurement, pattern);
     }
   }
 
   public static final class ValueNotRegexp extends 
ValueColumnPatternMatchFilter {
 
-    public ValueNotRegexp(String measurement, String regex) {
-      super(measurement, regex);
+    public ValueNotRegexp(String measurement, Pattern pattern) {
+      super(measurement, pattern);
     }
 
     @Override
@@ -576,7 +490,7 @@ public final class ValueFilterOperators {
 
     @Override
     public Filter reverse() {
-      return new ValueRegexp(measurement, pattern.pattern());
+      return new ValueRegexp(measurement, pattern);
     }
   }
 

Reply via email to