This is an automated email from the ASF dual-hosted git repository. hui pushed a commit to branch lmh/refactorFilter in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 789f9a6403dae90f82be930af4252c8e92902f20 Author: Minghui Liu <[email protected]> AuthorDate: Sun Nov 19 22:42:06 2023 +0800 replace like with regex --- .../plan/expression/unary/LikeExpression.java | 63 +---------- .../plan/expression/unary/RegularExpression.java | 6 +- .../ConvertExpressionToFilterVisitor.java | 13 ++- .../filter/basic/ColumnPatternMatchFilter.java | 13 +-- .../tsfile/read/filter/factory/ValueFilter.java | 121 +++++++++++++++++---- .../read/filter/operator/ValueFilterOperators.java | 108 ++---------------- 6 files changed, 127 insertions(+), 197 deletions(-) diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java index 2c08185f5c6..c7f11f1ec46 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/LikeExpression.java @@ -29,6 +29,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.regex.Pattern; +import static org.apache.iotdb.tsfile.read.filter.factory.ValueFilter.compileRegex; +import static org.apache.iotdb.tsfile.read.filter.factory.ValueFilter.parseLikePatternToRegex; + public class LikeExpression extends UnaryExpression { private final String patternString; @@ -40,7 +43,7 @@ public class LikeExpression extends UnaryExpression { super(expression); this.patternString = patternString; this.isNot = isNot; - pattern = compile(); + pattern = compileRegex(parseLikePatternToRegex(patternString)); } public LikeExpression( @@ -55,7 +58,7 @@ public class LikeExpression extends UnaryExpression { super(Expression.deserialize(byteBuffer)); patternString = ReadWriteIOUtils.readString(byteBuffer); isNot = ReadWriteIOUtils.readBool(byteBuffer); - pattern = compile(); + pattern = compileRegex(parseLikePatternToRegex(patternString)); } public String getPatternString() { @@ -70,62 +73,6 @@ public class LikeExpression extends UnaryExpression { return isNot; } - /** - * The main idea of this part comes from - * https://codereview.stackexchange.com/questions/36861/convert-sql-like-to-regex/36864 - */ - private Pattern compile() { - String unescapeValue = unescapeString(patternString); - String specialRegexString = ".^$*+?{}[]|()"; - StringBuilder patternBuilder = new StringBuilder(); - patternBuilder.append("^"); - for (int i = 0; i < unescapeValue.length(); i++) { - String ch = String.valueOf(unescapeValue.charAt(i)); - if (specialRegexString.contains(ch)) { - ch = "\\" + unescapeValue.charAt(i); - } - if (i == 0 - || !"\\".equals(String.valueOf(unescapeValue.charAt(i - 1))) - || i >= 2 - && "\\\\" - .equals( - patternBuilder.substring( - patternBuilder.length() - 2, patternBuilder.length()))) { - patternBuilder.append(ch.replace("%", ".*?").replace("_", ".")); - } else { - patternBuilder.append(ch); - } - } - patternBuilder.append("$"); - return Pattern.compile(patternBuilder.toString()); - } - - /** - * This Method is for un-escaping strings except '\' before special string '%', '_', '\', because - * we need to use '\' to judge whether to replace this to regexp string - */ - private String unescapeString(String value) { - StringBuilder stringBuilder = new StringBuilder(); - int curIndex = 0; - for (; curIndex < value.length(); curIndex++) { - String ch = String.valueOf(value.charAt(curIndex)); - if ("\\".equals(ch)) { - if (curIndex < value.length() - 1) { - String nextChar = String.valueOf(value.charAt(curIndex + 1)); - if ("%".equals(nextChar) || "_".equals(nextChar) || "\\".equals(nextChar)) { - stringBuilder.append(ch); - } - if ("\\".equals(nextChar)) { - curIndex++; - } - } - } else { - stringBuilder.append(ch); - } - } - return stringBuilder.toString(); - } - @Override protected String getExpressionStringInternal() { return expression.getExpressionString() + (isNot ? " NOT" : "") + " LIKE '" + pattern + "'"; diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java index 449c2bc21b7..828b4b64aa4 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/unary/RegularExpression.java @@ -31,6 +31,8 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.regex.Pattern; +import static org.apache.iotdb.tsfile.read.filter.factory.ValueFilter.compileRegex; + public class RegularExpression extends UnaryExpression { private final String patternString; @@ -42,7 +44,7 @@ public class RegularExpression extends UnaryExpression { super(expression); this.patternString = patternString; this.isNot = isNot; - pattern = Pattern.compile(patternString); + pattern = compileRegex(patternString); } public RegularExpression( @@ -57,7 +59,7 @@ public class RegularExpression extends UnaryExpression { super(Expression.deserialize(byteBuffer)); patternString = ReadWriteIOUtils.readString(byteBuffer); isNot = ReadWriteIOUtils.readBool(byteBuffer); - pattern = Pattern.compile(Validate.notNull(patternString)); + pattern = compileRegex(Validate.notNull(patternString, "patternString cannot be null")); } public String getPatternString() { diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java index f174e19427d..d6b2b7329ad 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/expression/visitor/predicate/ConvertExpressionToFilterVisitor.java @@ -47,6 +47,7 @@ import org.apache.iotdb.tsfile.read.filter.factory.ValueFilter; import java.util.LinkedHashSet; import java.util.Set; +import java.util.regex.Pattern; import static org.apache.iotdb.tsfile.read.filter.operator.Not.CONTAIN_NOT_ERR_MSG; @@ -159,11 +160,11 @@ public class ConvertExpressionToFilterVisitor extends PredicateVisitor<Filter, T } String measurement = ((TimeSeriesOperand) expression).getPath().getMeasurement(); - String likePattern = likeExpression.getPatternString(); + Pattern pattern = likeExpression.getPattern(); boolean isNot = likeExpression.isNot(); return isNot - ? ValueFilter.notLike(measurement, likePattern) - : ValueFilter.like(measurement, likePattern); + ? ValueFilter.notRegexp(measurement, pattern) + : ValueFilter.regexp(measurement, pattern); } @Override @@ -174,11 +175,11 @@ public class ConvertExpressionToFilterVisitor extends PredicateVisitor<Filter, T } String measurement = ((TimeSeriesOperand) expression).getPath().getMeasurement(); - String regex = regularExpression.getPatternString(); + Pattern pattern = regularExpression.getPattern(); boolean isNot = regularExpression.isNot(); return isNot - ? ValueFilter.notRegexp(measurement, regex) - : ValueFilter.regexp(measurement, regex); + ? ValueFilter.notRegexp(measurement, pattern) + : ValueFilter.regexp(measurement, pattern); } @Override diff --git a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java index 656403e6788..41eb97ae650 100644 --- a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java +++ b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/basic/ColumnPatternMatchFilter.java @@ -21,20 +21,13 @@ package org.apache.iotdb.tsfile.read.filter.basic; import java.util.Objects; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; -/* base class for Like, NotLike, Regex, NotRegex */ +/* base class for Regex, NotRegex */ public abstract class ColumnPatternMatchFilter implements Filter { - protected final String regex; protected final Pattern pattern; - protected ColumnPatternMatchFilter(String regex) { - this.regex = Objects.requireNonNull(regex, "regex cannot be null"); - try { - this.pattern = Pattern.compile(regex); - } catch (PatternSyntaxException e) { - throw new PatternSyntaxException("Regular expression error", regex, e.getIndex()); - } + protected ColumnPatternMatchFilter(Pattern pattern) { + this.pattern = Objects.requireNonNull(pattern, "pattern cannot be null"); } } diff --git a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java index bb0c36d77af..c68e226b19d 100644 --- a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java +++ b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/factory/ValueFilter.java @@ -24,17 +24,17 @@ import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueEq import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueGt; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueGtEq; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueIn; -import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueLike; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueLt; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueLtEq; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotBetweenAnd; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotEq; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotIn; -import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotLike; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueNotRegexp; import org.apache.iotdb.tsfile.read.filter.operator.ValueFilterOperators.ValueRegexp; import java.util.Set; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; public class ValueFilter { @@ -76,20 +76,20 @@ public class ValueFilter { return new ValueNotBetweenAnd<>(FAKE_MEASUREMENT, value1, value2); } - public static ValueLike like(String value) { - return new ValueLike(FAKE_MEASUREMENT, value); + public static ValueRegexp like(String likePattern) { + return regexp(FAKE_MEASUREMENT, parseLikePatternToRegex(likePattern)); } - public static ValueNotLike notLike(String value) { - return new ValueNotLike(FAKE_MEASUREMENT, value); + public static ValueNotRegexp notLike(String likePattern) { + return notRegexp(FAKE_MEASUREMENT, parseLikePatternToRegex(likePattern)); } - public static ValueRegexp regexp(String value) { - return new ValueRegexp(FAKE_MEASUREMENT, value); + public static ValueRegexp regexp(String regex) { + return new ValueRegexp(FAKE_MEASUREMENT, compileRegex(regex)); } - public static ValueNotRegexp notRegexp(String value) { - return new ValueNotRegexp(FAKE_MEASUREMENT, value); + public static ValueNotRegexp notRegexp(String regex) { + return new ValueNotRegexp(FAKE_MEASUREMENT, compileRegex(regex)); } public static <T extends Comparable<T>> ValueIn<T> in(Set<T> values) { @@ -134,20 +134,93 @@ public class ValueFilter { return new ValueNotBetweenAnd<>(measurement, value1, value2); } - public static ValueLike like(String measurement, String value) { - return new ValueLike(measurement, value); - } - - public static ValueNotLike notLike(String measurement, String value) { - return new ValueNotLike(measurement, value); - } - - public static ValueRegexp regexp(String measurement, String value) { - return new ValueRegexp(measurement, value); - } - - public static ValueNotRegexp notRegexp(String measurement, String value) { - return new ValueNotRegexp(measurement, value); + public static ValueRegexp like(String measurement, String likePattern) { + return regexp(measurement, parseLikePatternToRegex(likePattern)); + } + + public static ValueNotRegexp notLike(String measurement, String likePattern) { + return notRegexp(measurement, parseLikePatternToRegex(likePattern)); + } + + public static ValueRegexp regexp(String measurement, String regex) { + return new ValueRegexp(measurement, compileRegex(regex)); + } + + public static ValueNotRegexp notRegexp(String measurement, String regex) { + return new ValueNotRegexp(measurement, compileRegex(regex)); + } + + public static ValueRegexp regexp(String measurement, Pattern pattern) { + return new ValueRegexp(measurement, pattern); + } + + public static ValueNotRegexp notRegexp(String measurement, Pattern pattern) { + return new ValueNotRegexp(measurement, pattern); + } + + /** + * The main idea of this part comes from + * https://codereview.stackexchange.com/questions/36861/convert-sql-like-to-regex/36864 + */ + public static String parseLikePatternToRegex(String likePattern) { + String unescapeValue = unescapeString(likePattern); + String specialRegexStr = ".^$*+?{}[]|()"; + StringBuilder patternStrBuild = new StringBuilder(); + patternStrBuild.append("^"); + for (int i = 0; i < unescapeValue.length(); i++) { + String ch = String.valueOf(unescapeValue.charAt(i)); + if (specialRegexStr.contains(ch)) { + ch = "\\" + unescapeValue.charAt(i); + } + if (i == 0 + || !"\\".equals(String.valueOf(unescapeValue.charAt(i - 1))) + || i >= 2 + && "\\\\" + .equals( + patternStrBuild.substring( + patternStrBuild.length() - 2, patternStrBuild.length()))) { + String replaceStr = ch.replace("%", ".*?").replace("_", "."); + patternStrBuild.append(replaceStr); + } else { + patternStrBuild.append(ch); + } + } + patternStrBuild.append("$"); + return patternStrBuild.toString(); + } + + /** + * This Method is for un-escaping strings except '\' before special string '%', '_', '\', because + * we need to use '\' to judge whether to replace this to regexp string + */ + private static String unescapeString(String value) { + StringBuilder stringBuilder = new StringBuilder(); + int curIndex = 0; + for (; curIndex < value.length(); curIndex++) { + String ch = String.valueOf(value.charAt(curIndex)); + if ("\\".equals(ch)) { + if (curIndex < value.length() - 1) { + String nextChar = String.valueOf(value.charAt(curIndex + 1)); + if ("%".equals(nextChar) || "_".equals(nextChar) || "\\".equals(nextChar)) { + stringBuilder.append(ch); + } + if ("\\".equals(nextChar)) { + curIndex++; + } + } + } else { + stringBuilder.append(ch); + } + } + return stringBuilder.toString(); + } + + public static Pattern compileRegex(String regex) { + try { + return Pattern.compile(regex); + } catch (PatternSyntaxException e) { + throw new PatternSyntaxException("Illegal regex expression: ", regex, e.getIndex()); + } } public static <T extends Comparable<T>> ValueIn<T> in(String measurement, Set<T> values) { diff --git a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java index 545256d6b1a..bf0f93bb790 100644 --- a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java +++ b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/filter/operator/ValueFilterOperators.java @@ -33,6 +33,7 @@ import org.apache.iotdb.tsfile.read.filter.basic.IValueFilter; import java.util.Locale; import java.util.Objects; import java.util.Set; +import java.util.regex.Pattern; /** * These are the value column operators in a filter predicate expression tree. They are constructed @@ -434,19 +435,19 @@ public final class ValueFilterOperators { } } - // base class for ValueLike, ValueNotLike, ValueRegex, ValueNotRegex + // base class for ValueRegex, ValueNotRegex abstract static class ValueColumnPatternMatchFilter extends ColumnPatternMatchFilter implements IDisableStatisticsValueFilter { protected final String measurement; private final String toString; - protected ValueColumnPatternMatchFilter(String measurement, String regex) { - super(regex); + protected ValueColumnPatternMatchFilter(String measurement, Pattern pattern) { + super(pattern); this.measurement = Objects.requireNonNull(measurement, "measurement cannot be null"); String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); - this.toString = name + "(" + measurement + ", " + regex + ")"; + this.toString = name + "(" + measurement + ", " + pattern + ")"; } public String getMeasurement() { @@ -459,97 +460,10 @@ public final class ValueFilterOperators { } } - public static final class ValueLike extends ValueColumnPatternMatchFilter { - - public ValueLike(String measurement, String regex) { - super(measurement, parseLikePatternToRegex(regex)); - } - - @Override - public boolean satisfy(long time, Object value) { - return pattern.matcher(value.toString()).find(); - } - - @Override - public Filter reverse() { - return new ValueNotLike(measurement, pattern.pattern()); - } - } - - public static final class ValueNotLike extends ValueColumnPatternMatchFilter { - - public ValueNotLike(String measurement, String likePattern) { - super(measurement, parseLikePatternToRegex(likePattern)); - } - - @Override - public boolean satisfy(long time, Object value) { - return !pattern.matcher(value.toString()).find(); - } - - @Override - public Filter reverse() { - return new ValueLike(measurement, pattern.pattern()); - } - } - - private static String parseLikePatternToRegex(String value) { - String unescapeValue = unescapeString(value); - String specialRegexStr = ".^$*+?{}[]|()"; - StringBuilder patternStrBuild = new StringBuilder(); - patternStrBuild.append("^"); - for (int i = 0; i < unescapeValue.length(); i++) { - String ch = String.valueOf(unescapeValue.charAt(i)); - if (specialRegexStr.contains(ch)) { - ch = "\\" + unescapeValue.charAt(i); - } - if ((i == 0) - || (i > 0 && !"\\".equals(String.valueOf(unescapeValue.charAt(i - 1)))) - || (i >= 2 - && "\\\\" - .equals( - patternStrBuild.substring( - patternStrBuild.length() - 2, patternStrBuild.length())))) { - String replaceStr = ch.replace("%", ".*?").replace("_", "."); - patternStrBuild.append(replaceStr); - } else { - patternStrBuild.append(ch); - } - } - patternStrBuild.append("$"); - return patternStrBuild.toString(); - } - - /** - * This Method is for unescaping strings except '\' before special string '%', '_', '\', because - * we need to use '\' to judge whether to replace this to regexp string - */ - private static String unescapeString(String value) { - StringBuilder out = new StringBuilder(); - int curIndex = 0; - for (; curIndex < value.length(); curIndex++) { - String ch = String.valueOf(value.charAt(curIndex)); - if ("\\".equals(ch)) { - if (curIndex < value.length() - 1) { - String nextChar = String.valueOf(value.charAt(curIndex + 1)); - if ("%".equals(nextChar) || "_".equals(nextChar) || "\\".equals(nextChar)) { - out.append(ch); - } - if ("\\".equals(nextChar)) { - curIndex++; - } - } - } else { - out.append(ch); - } - } - return out.toString(); - } - public static final class ValueRegexp extends ValueColumnPatternMatchFilter { - public ValueRegexp(String measurement, String regex) { - super(measurement, regex); + public ValueRegexp(String measurement, Pattern pattern) { + super(measurement, pattern); } @Override @@ -559,14 +473,14 @@ public final class ValueFilterOperators { @Override public Filter reverse() { - return new ValueNotRegexp(measurement, pattern.pattern()); + return new ValueNotRegexp(measurement, pattern); } } public static final class ValueNotRegexp extends ValueColumnPatternMatchFilter { - public ValueNotRegexp(String measurement, String regex) { - super(measurement, regex); + public ValueNotRegexp(String measurement, Pattern pattern) { + super(measurement, pattern); } @Override @@ -576,7 +490,7 @@ public final class ValueFilterOperators { @Override public Filter reverse() { - return new ValueRegexp(measurement, pattern.pattern()); + return new ValueRegexp(measurement, pattern); } }
