KYLIN-2610 Optimize BuiltInFunctionTransformer performance KYLIN-2610 fix three percent sign bug
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/b3ba338e Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/b3ba338e Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/b3ba338e Branch: refs/heads/KYLIN-2624 Commit: b3ba338eaf889d42e02870007c1876eb32c884ee Parents: 3976420 Author: Hongbin Ma <mahong...@apache.org> Authored: Thu May 11 20:26:14 2017 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Fri May 12 13:00:43 2017 +0800 ---------------------------------------------------------------------- .../kylin/dict/BuiltInFunctionTransformer.java | 9 +- .../metadata/filter/function/BuiltInMethod.java | 39 +++-- .../metadata/filter/function/LikeMatchers.java | 141 +++++++++++++++++++ 3 files changed, 177 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b3ba338e/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java b/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java index 6ef2f96..5196593 100755 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java @@ -57,13 +57,13 @@ public class BuiltInFunctionTransformer implements ITupleFilterTransformer { //normal case translated = translateCompareTupleFilter((CompareTupleFilter) tupleFilter); if (translated != null) { - logger.debug("Translated {{}} to IN clause: {{}}", tupleFilter, translated); + logger.debug("Translated {{}} to IN clause. ", tupleFilter); } } else if (tupleFilter instanceof BuiltInFunctionTupleFilter) { - //like case + //like,tolower case translated = translateFunctionTupleFilter((BuiltInFunctionTupleFilter) tupleFilter); if (translated != null) { - logger.debug("Translated {{}} to IN clause: {{}}", tupleFilter, translated); + logger.debug("Translated {{}} to IN clause. ", tupleFilter); } } else if (tupleFilter instanceof LogicalTupleFilter) { @SuppressWarnings("unchecked") @@ -99,6 +99,7 @@ public class BuiltInFunctionTransformer implements ITupleFilterTransformer { translated.addChild(new ConstantTupleFilter(dictVal)); } } + logger.debug("getting a in clause with {} children", translated.getChildren().size()); } catch (Exception e) { logger.debug(e.getMessage()); return null; @@ -175,4 +176,4 @@ public class BuiltInFunctionTransformer implements ITupleFilterTransformer { } return translated; } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/kylin/blob/b3ba338e/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java index 2f28fae..31ee297 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java @@ -18,8 +18,11 @@ package org.apache.kylin.metadata.filter.function; +import static org.apache.kylin.metadata.filter.function.LikeMatchers.LikeMatcher; + import java.lang.reflect.Method; -import java.util.regex.Pattern; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.lang3.reflect.MethodUtils; @@ -28,9 +31,15 @@ import com.google.common.collect.ImmutableMap; public enum BuiltInMethod { UPPER(BuiltInMethod.class, "upper", String.class), LOWER(BuiltInMethod.class, "lower", String.class), SUBSTRING(BuiltInMethod.class, "substring", String.class, int.class, int.class), CHAR_LENGTH(BuiltInMethod.class, "charLength", String.class), LIKE(BuiltInMethod.class, "like", String.class, String.class), INITCAP(BuiltInMethod.class, "initcap", String.class); public final Method method; - public static final ImmutableMap<String, BuiltInMethod> MAP; + private static ThreadLocal<Map<String, LikeMatcher>> likePatterns = new ThreadLocal<Map<String, LikeMatcher>>() { + @Override + public Map<String, LikeMatcher> initialValue() { + return new HashMap<>(); + } + }; + static { final ImmutableMap.Builder<String, BuiltInMethod> builder = ImmutableMap.builder(); for (BuiltInMethod value : BuiltInMethod.values()) { @@ -46,12 +55,26 @@ public enum BuiltInMethod { } /** SQL {@code LIKE} function. */ - public static boolean like(String s, String pattern) { - if (s == null) + public static boolean like(String s, String patternStr) { + //TODO: escape in like is unsupported + //TODO: like is case sensitive now + + if (s == null || patternStr == null) return false; - - final String regex = Like.sqlToRegexLike(pattern, null); - return Pattern.matches(regex, s); + + Map<String, LikeMatcher> patterns = likePatterns.get(); + LikeMatcher p = patterns.get(patternStr); + if (p == null) { + + p = LikeMatchers.createMatcher(patternStr); + + if (patterns.size() > 100) { + patterns.clear();//brutal but good enough + } + patterns.put(patternStr, p); + } + + return p.matches(s); } /** SQL INITCAP(string) function. */ @@ -117,4 +140,4 @@ public enum BuiltInMethod { return s.toLowerCase(); } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/kylin/blob/b3ba338e/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java new file mode 100644 index 0000000..ef1e412 --- /dev/null +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kylin.metadata.filter.function; + +import java.util.regex.Pattern; + +import org.apache.commons.lang.StringUtils; + +import com.google.common.base.Preconditions; + +public class LikeMatchers { + + private final static String PERCENT_SIGN = "%"; + private final static String UNDERSCORE_SIGN = "_"; + + public interface LikeMatcher { + boolean matches(String input); + } + + public static class DefaultLikeMatcher implements LikeMatcher { + private Pattern p; + + private DefaultLikeMatcher(String patternStr) { + patternStr = patternStr.toLowerCase(); + final String regex = Like.sqlToRegexLike(patternStr, null); + p = Pattern.compile(regex); + } + + @Override + public boolean matches(String input) { + return p.matcher(input).matches(); + } + } + + // abc%, %abc, abc%def + public static class OnePercentSignLikeMatcher implements LikeMatcher { + + enum SignPosition { + LEFT, MIDDLE, RIGHT + } + + private SignPosition signPosition; + private String[] remaining; + + private OnePercentSignLikeMatcher(String patternStr) { + if (patternStr.startsWith(PERCENT_SIGN)) { + signPosition = SignPosition.LEFT; + } else if (patternStr.endsWith(PERCENT_SIGN)) { + signPosition = SignPosition.RIGHT; + } else { + signPosition = SignPosition.MIDDLE; + } + remaining = StringUtils.split(patternStr, PERCENT_SIGN); + } + + @Override + public boolean matches(String input) { + if (input == null) + return false; + + switch (signPosition) { + case LEFT: + return input.endsWith(remaining[0]); + case RIGHT: + return input.startsWith(remaining[0]); + case MIDDLE: + return input.startsWith(remaining[0]) && input.endsWith(remaining[1]); + default: + throw new IllegalStateException(); + } + } + } + + //only deal with %abc% + public static class TwoPercentSignLikeMatcher implements LikeMatcher { + private String[] remaining; + + private TwoPercentSignLikeMatcher(String patternStr) { + remaining = StringUtils.split(patternStr, PERCENT_SIGN); + Preconditions.checkState(remaining.length == 1); + } + + @Override + public boolean matches(String input) { + return input.contains(remaining[0]); + } + } + + //only deal with %abc%def% + public static class ThreePercentSignLikeMatcher implements LikeMatcher { + private String[] remaining; + + private ThreePercentSignLikeMatcher(String patternStr) { + remaining = StringUtils.split(patternStr, PERCENT_SIGN); + Preconditions.checkState(remaining.length == 2); + } + + @Override + public boolean matches(String input) { + int i = input.indexOf(remaining[0]); + int j = input.lastIndexOf(remaining[1]); + return (i != -1) && (j != -1) && (i <= j - remaining[0].length()); + } + } + + public static LikeMatcher createMatcher(String patternStr) { + if (patternStr == null) { + throw new IllegalArgumentException("pattern is null"); + } + + if (patternStr.contains(UNDERSCORE_SIGN)) { + return new DefaultLikeMatcher(patternStr); + } + + int count = StringUtils.countMatches(patternStr, PERCENT_SIGN); + if (count == 1) { + return new OnePercentSignLikeMatcher(patternStr); + } else if (count == 2 && patternStr.startsWith(PERCENT_SIGN) && patternStr.endsWith(PERCENT_SIGN)) { + return new TwoPercentSignLikeMatcher(patternStr); + } else if (count == 3 && patternStr.startsWith(PERCENT_SIGN) && patternStr.endsWith(PERCENT_SIGN) && !patternStr.contains(PERCENT_SIGN + PERCENT_SIGN)) { + return new ThreePercentSignLikeMatcher(patternStr); + } else { + return new DefaultLikeMatcher(patternStr); + } + } +}